Skip to content

Commit 4472648

Browse files
committed
[ARM] Expand bf16 expanding/rounding fp loads/stores
As with other fp types, these should be expanded to prevent nodes that are illegal for Arm.
1 parent 4111841 commit 4472648

File tree

2 files changed

+102
-8
lines changed

2 files changed

+102
-8
lines changed

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1113,12 +1113,15 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
11131113
for (MVT VT : MVT::fp_valuetypes()) {
11141114
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
11151115
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
1116+
setLoadExtAction(ISD::EXTLOAD, VT, MVT::bf16, Expand);
11161117
}
11171118

11181119
// ... or truncating stores
11191120
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
11201121
setTruncStoreAction(MVT::f32, MVT::f16, Expand);
11211122
setTruncStoreAction(MVT::f64, MVT::f16, Expand);
1123+
setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
1124+
setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
11221125

11231126
// ARM does not have i1 sign extending load.
11241127
for (MVT VT : MVT::integer_valuetypes())

llvm/test/CodeGen/Thumb2/bf16-instructions.ll

Lines changed: 99 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,55 @@ define void @test_store(bfloat %a, ptr %b) {
220220
ret void
221221
}
222222

223+
define void @test_truncstore32(float %a, ptr %b) {
224+
; CHECK-NOFP-LABEL: test_truncstore32:
225+
; CHECK-NOFP: @ %bb.0:
226+
; CHECK-NOFP-NEXT: .save {r4, lr}
227+
; CHECK-NOFP-NEXT: push {r4, lr}
228+
; CHECK-NOFP-NEXT: mov r4, r1
229+
; CHECK-NOFP-NEXT: bl __truncsfbf2
230+
; CHECK-NOFP-NEXT: strh r0, [r4]
231+
; CHECK-NOFP-NEXT: pop {r4, pc}
232+
;
233+
; CHECK-FP-LABEL: test_truncstore32:
234+
; CHECK-FP: @ %bb.0:
235+
; CHECK-FP-NEXT: .save {r4, lr}
236+
; CHECK-FP-NEXT: push {r4, lr}
237+
; CHECK-FP-NEXT: mov r4, r0
238+
; CHECK-FP-NEXT: bl __truncsfbf2
239+
; CHECK-FP-NEXT: vmov r0, s0
240+
; CHECK-FP-NEXT: strh r0, [r4]
241+
; CHECK-FP-NEXT: pop {r4, pc}
242+
%r = fptrunc float %a to bfloat
243+
store bfloat %r, ptr %b
244+
ret void
245+
}
246+
247+
define void @test_truncstore64(double %a, ptr %b) {
248+
; CHECK-NOFP-LABEL: test_truncstore64:
249+
; CHECK-NOFP: @ %bb.0:
250+
; CHECK-NOFP-NEXT: .save {r4, lr}
251+
; CHECK-NOFP-NEXT: push {r4, lr}
252+
; CHECK-NOFP-NEXT: mov r4, r2
253+
; CHECK-NOFP-NEXT: bl __truncdfbf2
254+
; CHECK-NOFP-NEXT: strh r0, [r4]
255+
; CHECK-NOFP-NEXT: pop {r4, pc}
256+
;
257+
; CHECK-FP-LABEL: test_truncstore64:
258+
; CHECK-FP: @ %bb.0:
259+
; CHECK-FP-NEXT: .save {r4, lr}
260+
; CHECK-FP-NEXT: push {r4, lr}
261+
; CHECK-FP-NEXT: mov r4, r0
262+
; CHECK-FP-NEXT: vmov r0, r1, d0
263+
; CHECK-FP-NEXT: bl __aeabi_d2f
264+
; CHECK-FP-NEXT: lsrs r0, r0, #16
265+
; CHECK-FP-NEXT: strh r0, [r4]
266+
; CHECK-FP-NEXT: pop {r4, pc}
267+
%r = fptrunc double %a to bfloat
268+
store bfloat %r, ptr %b
269+
ret void
270+
}
271+
223272
define bfloat @test_load(ptr %a) {
224273
; CHECK-NOFP-LABEL: test_load:
225274
; CHECK-NOFP: @ %bb.0:
@@ -235,6 +284,48 @@ define bfloat @test_load(ptr %a) {
235284
ret bfloat %r
236285
}
237286

287+
define float @test_loadext32(ptr %a) {
288+
; CHECK-NOFP-LABEL: test_loadext32:
289+
; CHECK-NOFP: @ %bb.0:
290+
; CHECK-NOFP-NEXT: ldrh r0, [r0]
291+
; CHECK-NOFP-NEXT: lsls r0, r0, #16
292+
; CHECK-NOFP-NEXT: bx lr
293+
;
294+
; CHECK-FP-LABEL: test_loadext32:
295+
; CHECK-FP: @ %bb.0:
296+
; CHECK-FP-NEXT: ldrh r0, [r0]
297+
; CHECK-FP-NEXT: lsls r0, r0, #16
298+
; CHECK-FP-NEXT: vmov s0, r0
299+
; CHECK-FP-NEXT: bx lr
300+
%r = load bfloat, ptr %a
301+
%d = fpext bfloat %r to float
302+
ret float %d
303+
}
304+
305+
define double @test_loadext64(ptr %a) {
306+
; CHECK-NOFP-LABEL: test_loadext64:
307+
; CHECK-NOFP: @ %bb.0:
308+
; CHECK-NOFP-NEXT: .save {r7, lr}
309+
; CHECK-NOFP-NEXT: push {r7, lr}
310+
; CHECK-NOFP-NEXT: ldrh r0, [r0]
311+
; CHECK-NOFP-NEXT: lsls r0, r0, #16
312+
; CHECK-NOFP-NEXT: bl __aeabi_f2d
313+
; CHECK-NOFP-NEXT: pop {r7, pc}
314+
;
315+
; CHECK-FP-LABEL: test_loadext64:
316+
; CHECK-FP: @ %bb.0:
317+
; CHECK-FP-NEXT: .save {r7, lr}
318+
; CHECK-FP-NEXT: push {r7, lr}
319+
; CHECK-FP-NEXT: ldrh r0, [r0]
320+
; CHECK-FP-NEXT: lsls r0, r0, #16
321+
; CHECK-FP-NEXT: bl __aeabi_f2d
322+
; CHECK-FP-NEXT: vmov d0, r0, r1
323+
; CHECK-FP-NEXT: pop {r7, pc}
324+
%r = load bfloat, ptr %a
325+
%d = fpext bfloat %r to double
326+
ret double %d
327+
}
328+
238329
declare bfloat @test_callee(bfloat %a, bfloat %b)
239330

240331
define bfloat @test_call(bfloat %a, bfloat %b) {
@@ -867,8 +958,8 @@ define void @test_fccmp(bfloat %in, ptr %out) {
867958
; CHECK-FP-LABEL: test_fccmp:
868959
; CHECK-FP: @ %bb.0:
869960
; CHECK-FP-NEXT: vmov r1, s0
870-
; CHECK-FP-NEXT: vldr s0, .LCPI30_0
871-
; CHECK-FP-NEXT: vldr s4, .LCPI30_1
961+
; CHECK-FP-NEXT: vldr s0, .LCPI34_0
962+
; CHECK-FP-NEXT: vldr s4, .LCPI34_1
872963
; CHECK-FP-NEXT: lsls r2, r1, #16
873964
; CHECK-FP-NEXT: vmov s2, r2
874965
; CHECK-FP-NEXT: mov.w r2, #17664
@@ -882,9 +973,9 @@ define void @test_fccmp(bfloat %in, ptr %out) {
882973
; CHECK-FP-NEXT: bx lr
883974
; CHECK-FP-NEXT: .p2align 2
884975
; CHECK-FP-NEXT: @ %bb.1:
885-
; CHECK-FP-NEXT: .LCPI30_0:
976+
; CHECK-FP-NEXT: .LCPI34_0:
886977
; CHECK-FP-NEXT: .long 0x45000000 @ float 2048
887-
; CHECK-FP-NEXT: .LCPI30_1:
978+
; CHECK-FP-NEXT: .LCPI34_1:
888979
; CHECK-FP-NEXT: .long 0x48000000 @ float 131072
889980
%cmp1 = fcmp ogt bfloat %in, 0xR4800
890981
%cmp2 = fcmp olt bfloat %in, 0xR4500
@@ -941,14 +1032,14 @@ define bfloat @test_phi(ptr %p1) {
9411032
; CHECK-NOFP-NEXT: push {r4, r5, r6, lr}
9421033
; CHECK-NOFP-NEXT: ldrh r6, [r0]
9431034
; CHECK-NOFP-NEXT: mov r4, r0
944-
; CHECK-NOFP-NEXT: .LBB32_1: @ %loop
1035+
; CHECK-NOFP-NEXT: .LBB36_1: @ %loop
9451036
; CHECK-NOFP-NEXT: @ =>This Inner Loop Header: Depth=1
9461037
; CHECK-NOFP-NEXT: mov r0, r4
9471038
; CHECK-NOFP-NEXT: mov r5, r6
9481039
; CHECK-NOFP-NEXT: ldrh r6, [r4]
9491040
; CHECK-NOFP-NEXT: bl test_dummy
9501041
; CHECK-NOFP-NEXT: lsls r0, r0, #31
951-
; CHECK-NOFP-NEXT: bne .LBB32_1
1042+
; CHECK-NOFP-NEXT: bne .LBB36_1
9521043
; CHECK-NOFP-NEXT: @ %bb.2: @ %return
9531044
; CHECK-NOFP-NEXT: mov r0, r5
9541045
; CHECK-NOFP-NEXT: pop {r4, r5, r6, pc}
@@ -962,15 +1053,15 @@ define bfloat @test_phi(ptr %p1) {
9621053
; CHECK-FP-NEXT: mov r4, r0
9631054
; CHECK-FP-NEXT: ldrh r0, [r0]
9641055
; CHECK-FP-NEXT: vmov s18, r0
965-
; CHECK-FP-NEXT: .LBB32_1: @ %loop
1056+
; CHECK-FP-NEXT: .LBB36_1: @ %loop
9661057
; CHECK-FP-NEXT: @ =>This Inner Loop Header: Depth=1
9671058
; CHECK-FP-NEXT: ldrh r0, [r4]
9681059
; CHECK-FP-NEXT: vmov.f32 s16, s18
9691060
; CHECK-FP-NEXT: vmov s18, r0
9701061
; CHECK-FP-NEXT: mov r0, r4
9711062
; CHECK-FP-NEXT: bl test_dummy
9721063
; CHECK-FP-NEXT: lsls r0, r0, #31
973-
; CHECK-FP-NEXT: bne .LBB32_1
1064+
; CHECK-FP-NEXT: bne .LBB36_1
9741065
; CHECK-FP-NEXT: @ %bb.2: @ %return
9751066
; CHECK-FP-NEXT: vmov.f32 s0, s16
9761067
; CHECK-FP-NEXT: vpop {d8, d9}

0 commit comments

Comments
 (0)