Skip to content

Commit f4c5cad

Browse files
committed
[ARM] Select f32 constants with vmov.f16
This adds lowering for f32 values using the vmov.f16, which zeroes the top bits whilst setting the lower bits to a pattern. This range of values does not often come up, except where a f16 constant value has been converted to a f32. Differential Revision: https://reviews.llvm.org/D87790
1 parent 2a77441 commit f4c5cad

File tree

5 files changed

+103
-53
lines changed

5 files changed

+103
-53
lines changed

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18067,6 +18067,9 @@ bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
1806718067
return false;
1806818068
if (VT == MVT::f16 && Subtarget->hasFullFP16())
1806918069
return ARM_AM::getFP16Imm(Imm) != -1;
18070+
if (VT == MVT::f32 && Subtarget->hasFullFP16() &&
18071+
ARM_AM::getFP32FP16Imm(Imm) != -1)
18072+
return true;
1807018073
if (VT == MVT::f32)
1807118074
return ARM_AM::getFP32Imm(Imm) != -1;
1807218075
if (VT == MVT::f64 && Subtarget->hasFP64())

llvm/lib/Target/ARM/ARMInstrVFP.td

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,16 @@ def vfp_f16imm : Operand<f16>,
5454
let ParserMatchClass = FPImmOperand;
5555
}
5656

57+
def vfp_f32f16imm_xform : SDNodeXForm<fpimm, [{
58+
APFloat InVal = N->getValueAPF();
59+
uint32_t enc = ARM_AM::getFP32FP16Imm(InVal);
60+
return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32);
61+
}]>;
62+
63+
def vfp_f32f16imm : PatLeaf<(f32 fpimm), [{
64+
return ARM_AM::getFP32FP16Imm(N->getValueAPF()) != -1;
65+
}], vfp_f32f16imm_xform>;
66+
5767
def vfp_f32imm_xform : SDNodeXForm<fpimm, [{
5868
APFloat InVal = N->getValueAPF();
5969
uint32_t enc = ARM_AM::getFP32Imm(InVal);
@@ -2637,6 +2647,11 @@ def FCONSTH : VFPAI<(outs HPR:$Sd), (ins vfp_f16imm:$imm),
26372647
}
26382648
}
26392649

2650+
def : Pat<(f32 (vfp_f32f16imm:$imm)),
2651+
(f32 (COPY_TO_REGCLASS (f16 (FCONSTH (vfp_f32f16imm_xform (f32 $imm)))), SPR))> {
2652+
let Predicates = [HasFullFP16];
2653+
}
2654+
26402655
//===----------------------------------------------------------------------===//
26412656
// Assembler aliases.
26422657
//

llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -687,6 +687,18 @@ namespace ARM_AM {
687687
return getFP16Imm(FPImm.bitcastToAPInt());
688688
}
689689

690+
/// If this is a FP16Imm encoded as a fp32 value, return the 8-bit encoding
691+
/// for it. Otherwise return -1 like getFP16Imm.
692+
inline int getFP32FP16Imm(const APInt &Imm) {
693+
if (Imm.getActiveBits() > 16)
694+
return -1;
695+
return ARM_AM::getFP16Imm(Imm.trunc(16));
696+
}
697+
698+
inline int getFP32FP16Imm(const APFloat &FPImm) {
699+
return getFP32FP16Imm(FPImm.bitcastToAPInt());
700+
}
701+
690702
/// getFP32Imm - Return an 8-bit floating-point version of the 32-bit
691703
/// floating-point value. If the value cannot be represented as an 8-bit
692704
/// floating-point value, then return -1.

llvm/test/CodeGen/ARM/cmse-clear-float-hard.ll

Lines changed: 72 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -874,7 +874,7 @@ define half @h1(half (half)* nocapture %hptr) "cmse_nonsecure_entry" nounwind {
874874
; CHECK-MVE-NEXT: vstr fpcxtns, [sp, #-4]!
875875
; CHECK-MVE-NEXT: push {r7, lr}
876876
; CHECK-MVE-NEXT: sub sp, #4
877-
; CHECK-MVE-NEXT: vldr s0, .LCPI11_0
877+
; CHECK-MVE-NEXT: vmov.f16 s0, #1.000000e+01
878878
; CHECK-MVE-NEXT: blx r0
879879
; CHECK-MVE-NEXT: vmov.f16 r0, s0
880880
; CHECK-MVE-NEXT: vmov s0, r0
@@ -884,10 +884,6 @@ define half @h1(half (half)* nocapture %hptr) "cmse_nonsecure_entry" nounwind {
884884
; CHECK-MVE-NEXT: vldr fpcxtns, [sp], #4
885885
; CHECK-MVE-NEXT: clrm {r0, r1, r2, r3, r12, apsr}
886886
; CHECK-MVE-NEXT: bxns lr
887-
; CHECK-MVE-NEXT: .p2align 2
888-
; CHECK-MVE-NEXT: @ %bb.1:
889-
; CHECK-MVE-NEXT: .LCPI11_0:
890-
; CHECK-MVE-NEXT: .long 0x00004900 @ float 2.61874657E-41
891887
%call = call half %hptr(half 10.0) nounwind
892888
ret half %call
893889
}
@@ -931,25 +927,41 @@ define half @h2(half (half)* nocapture %hptr) nounwind {
931927
; CHECK-8M-NEXT: .LCPI12_0:
932928
; CHECK-8M-NEXT: .long 0x00004900 @ float 2.61874657E-41
933929
;
934-
; CHECK-81M-LABEL: h2:
935-
; CHECK-81M: @ %bb.0: @ %entry
936-
; CHECK-81M-NEXT: push {r7, lr}
937-
; CHECK-81M-NEXT: vldr s0, .LCPI12_0
938-
; CHECK-81M-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
939-
; CHECK-81M-NEXT: bic r0, r0, #1
940-
; CHECK-81M-NEXT: vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
941-
; CHECK-81M-NEXT: vscclrm {s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, vpr}
942-
; CHECK-81M-NEXT: vstr fpcxts, [sp, #-8]!
943-
; CHECK-81M-NEXT: clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr}
944-
; CHECK-81M-NEXT: blxns r0
945-
; CHECK-81M-NEXT: vldr fpcxts, [sp], #8
946-
; CHECK-81M-NEXT: vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
947-
; CHECK-81M-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
948-
; CHECK-81M-NEXT: pop {r7, pc}
949-
; CHECK-81M-NEXT: .p2align 2
950-
; CHECK-81M-NEXT: @ %bb.1:
951-
; CHECK-81M-NEXT: .LCPI12_0:
952-
; CHECK-81M-NEXT: .long 0x00004900 @ float 2.61874657E-41
930+
; CHECK-NO-MVE-LABEL: h2:
931+
; CHECK-NO-MVE: @ %bb.0: @ %entry
932+
; CHECK-NO-MVE-NEXT: push {r7, lr}
933+
; CHECK-NO-MVE-NEXT: vldr s0, .LCPI12_0
934+
; CHECK-NO-MVE-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
935+
; CHECK-NO-MVE-NEXT: bic r0, r0, #1
936+
; CHECK-NO-MVE-NEXT: vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
937+
; CHECK-NO-MVE-NEXT: vscclrm {s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, vpr}
938+
; CHECK-NO-MVE-NEXT: vstr fpcxts, [sp, #-8]!
939+
; CHECK-NO-MVE-NEXT: clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr}
940+
; CHECK-NO-MVE-NEXT: blxns r0
941+
; CHECK-NO-MVE-NEXT: vldr fpcxts, [sp], #8
942+
; CHECK-NO-MVE-NEXT: vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
943+
; CHECK-NO-MVE-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
944+
; CHECK-NO-MVE-NEXT: pop {r7, pc}
945+
; CHECK-NO-MVE-NEXT: .p2align 2
946+
; CHECK-NO-MVE-NEXT: @ %bb.1:
947+
; CHECK-NO-MVE-NEXT: .LCPI12_0:
948+
; CHECK-NO-MVE-NEXT: .long 0x00004900 @ float 2.61874657E-41
949+
;
950+
; CHECK-MVE-LABEL: h2:
951+
; CHECK-MVE: @ %bb.0: @ %entry
952+
; CHECK-MVE-NEXT: push {r7, lr}
953+
; CHECK-MVE-NEXT: vmov.f16 s0, #1.000000e+01
954+
; CHECK-MVE-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
955+
; CHECK-MVE-NEXT: bic r0, r0, #1
956+
; CHECK-MVE-NEXT: vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
957+
; CHECK-MVE-NEXT: vscclrm {s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, vpr}
958+
; CHECK-MVE-NEXT: vstr fpcxts, [sp, #-8]!
959+
; CHECK-MVE-NEXT: clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr}
960+
; CHECK-MVE-NEXT: blxns r0
961+
; CHECK-MVE-NEXT: vldr fpcxts, [sp], #8
962+
; CHECK-MVE-NEXT: vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
963+
; CHECK-MVE-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
964+
; CHECK-MVE-NEXT: pop {r7, pc}
953965
entry:
954966
%call = call half %hptr(half 10.0) "cmse_nonsecure_call" nounwind
955967
ret half %call
@@ -994,25 +1006,41 @@ define half @h3(half (half)* nocapture %hptr) nounwind {
9941006
; CHECK-8M-NEXT: .LCPI13_0:
9951007
; CHECK-8M-NEXT: .long 0x00004900 @ float 2.61874657E-41
9961008
;
997-
; CHECK-81M-LABEL: h3:
998-
; CHECK-81M: @ %bb.0: @ %entry
999-
; CHECK-81M-NEXT: push {r7, lr}
1000-
; CHECK-81M-NEXT: vldr s0, .LCPI13_0
1001-
; CHECK-81M-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
1002-
; CHECK-81M-NEXT: bic r0, r0, #1
1003-
; CHECK-81M-NEXT: vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
1004-
; CHECK-81M-NEXT: vscclrm {s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, vpr}
1005-
; CHECK-81M-NEXT: vstr fpcxts, [sp, #-8]!
1006-
; CHECK-81M-NEXT: clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr}
1007-
; CHECK-81M-NEXT: blxns r0
1008-
; CHECK-81M-NEXT: vldr fpcxts, [sp], #8
1009-
; CHECK-81M-NEXT: vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
1010-
; CHECK-81M-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
1011-
; CHECK-81M-NEXT: pop {r7, pc}
1012-
; CHECK-81M-NEXT: .p2align 2
1013-
; CHECK-81M-NEXT: @ %bb.1:
1014-
; CHECK-81M-NEXT: .LCPI13_0:
1015-
; CHECK-81M-NEXT: .long 0x00004900 @ float 2.61874657E-41
1009+
; CHECK-NO-MVE-LABEL: h3:
1010+
; CHECK-NO-MVE: @ %bb.0: @ %entry
1011+
; CHECK-NO-MVE-NEXT: push {r7, lr}
1012+
; CHECK-NO-MVE-NEXT: vldr s0, .LCPI13_0
1013+
; CHECK-NO-MVE-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
1014+
; CHECK-NO-MVE-NEXT: bic r0, r0, #1
1015+
; CHECK-NO-MVE-NEXT: vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
1016+
; CHECK-NO-MVE-NEXT: vscclrm {s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, vpr}
1017+
; CHECK-NO-MVE-NEXT: vstr fpcxts, [sp, #-8]!
1018+
; CHECK-NO-MVE-NEXT: clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr}
1019+
; CHECK-NO-MVE-NEXT: blxns r0
1020+
; CHECK-NO-MVE-NEXT: vldr fpcxts, [sp], #8
1021+
; CHECK-NO-MVE-NEXT: vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
1022+
; CHECK-NO-MVE-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
1023+
; CHECK-NO-MVE-NEXT: pop {r7, pc}
1024+
; CHECK-NO-MVE-NEXT: .p2align 2
1025+
; CHECK-NO-MVE-NEXT: @ %bb.1:
1026+
; CHECK-NO-MVE-NEXT: .LCPI13_0:
1027+
; CHECK-NO-MVE-NEXT: .long 0x00004900 @ float 2.61874657E-41
1028+
;
1029+
; CHECK-MVE-LABEL: h3:
1030+
; CHECK-MVE: @ %bb.0: @ %entry
1031+
; CHECK-MVE-NEXT: push {r7, lr}
1032+
; CHECK-MVE-NEXT: vmov.f16 s0, #1.000000e+01
1033+
; CHECK-MVE-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
1034+
; CHECK-MVE-NEXT: bic r0, r0, #1
1035+
; CHECK-MVE-NEXT: vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
1036+
; CHECK-MVE-NEXT: vscclrm {s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, vpr}
1037+
; CHECK-MVE-NEXT: vstr fpcxts, [sp, #-8]!
1038+
; CHECK-MVE-NEXT: clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr}
1039+
; CHECK-MVE-NEXT: blxns r0
1040+
; CHECK-MVE-NEXT: vldr fpcxts, [sp], #8
1041+
; CHECK-MVE-NEXT: vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
1042+
; CHECK-MVE-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
1043+
; CHECK-MVE-NEXT: pop {r7, pc}
10161044
entry:
10171045
%call = tail call half %hptr(half 10.0) "cmse_nonsecure_call" nounwind
10181046
ret half %call
@@ -1123,7 +1151,7 @@ define half @h1_minsize(half (half)* nocapture %hptr) "cmse_nonsecure_entry" min
11231151
; CHECK-MVE: @ %bb.0: @ %entry
11241152
; CHECK-MVE-NEXT: vstr fpcxtns, [sp, #-4]!
11251153
; CHECK-MVE-NEXT: push {r6, r7, lr}
1126-
; CHECK-MVE-NEXT: vldr s0, .LCPI15_0
1154+
; CHECK-MVE-NEXT: vmov.f16 s0, #1.000000e+01
11271155
; CHECK-MVE-NEXT: blx r0
11281156
; CHECK-MVE-NEXT: vmov.f16 r0, s0
11291157
; CHECK-MVE-NEXT: vmov s0, r0
@@ -1132,10 +1160,6 @@ define half @h1_minsize(half (half)* nocapture %hptr) "cmse_nonsecure_entry" min
11321160
; CHECK-MVE-NEXT: vldr fpcxtns, [sp], #4
11331161
; CHECK-MVE-NEXT: clrm {r0, r1, r2, r3, r12, apsr}
11341162
; CHECK-MVE-NEXT: bxns lr
1135-
; CHECK-MVE-NEXT: .p2align 2
1136-
; CHECK-MVE-NEXT: @ %bb.1:
1137-
; CHECK-MVE-NEXT: .LCPI15_0:
1138-
; CHECK-MVE-NEXT: .long 0x00004900 @ float 2.61874657E-41
11391163
entry:
11401164
%call = call half %hptr(half 10.0) nounwind
11411165
ret half %call

llvm/test/CodeGen/ARM/fp16-bitcast.ll

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -152,12 +152,8 @@ define half @constcall() {
152152
;
153153
; CHECK-FP16-HARD-LABEL: constcall:
154154
; CHECK-FP16-HARD: @ %bb.0: @ %entry
155-
; CHECK-FP16-HARD-NEXT: vldr s0, .LCPI4_0
155+
; CHECK-FP16-HARD-NEXT: vmov.f16 s0, #1.000000e+01
156156
; CHECK-FP16-HARD-NEXT: b ccc
157-
; CHECK-FP16-HARD-NEXT: .p2align 2
158-
; CHECK-FP16-HARD-NEXT: @ %bb.1:
159-
; CHECK-FP16-HARD-NEXT: .LCPI4_0:
160-
; CHECK-FP16-HARD-NEXT: .long 0x00004900 @ float 2.61874657E-41
161157
entry:
162158
%call = tail call fast half @ccc(half 0xH4900)
163159
ret half %call

0 commit comments

Comments
 (0)