Skip to content

Commit d19218e

Browse files
[SelectionDAG] Preserve fast math flags when legalizing/promoting (#130124)
When we have a floating-point operation that a target doesn't support for a given type, but does support for a wider type, then there are two ways this can be handled: * If the target doesn't have any registers at all of this type then LegalizeTypes will convert the operation. * If we do have registers but no operation for this type, then the operation action will be Promote and it's handled in PromoteNode. In both cases the operation at the wider type, and the conversion operations to and from that type, should have the same fast math flags as the original operation. This is being done in preparation for a DAGCombine patch which makes use of these fast math flags.
1 parent b47dac6 commit d19218e

File tree

7 files changed

+263
-28
lines changed

7 files changed

+263
-28
lines changed

llvm/include/llvm/CodeGen/SelectionDAGNodes.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -421,6 +421,8 @@ struct SDNodeFlags {
421421

422422
PoisonGeneratingFlags = NoUnsignedWrap | NoSignedWrap | Exact | Disjoint |
423423
NonNeg | NoNaNs | NoInfs | SameSign,
424+
FastMathFlags = NoNaNs | NoInfs | NoSignedZeros | AllowReciprocal |
425+
AllowContract | ApproximateFuncs | AllowReassociation,
424426
};
425427

426428
/// Default constructor turns off all optimization flags.

llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5075,6 +5075,9 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
50755075
if (Node->getOpcode() == ISD::BR_CC ||
50765076
Node->getOpcode() == ISD::SELECT_CC)
50775077
OVT = Node->getOperand(2).getSimpleValueType();
5078+
// Preserve fast math flags
5079+
SDNodeFlags FastMathFlags = Node->getFlags() & SDNodeFlags::FastMathFlags;
5080+
SelectionDAG::FlagInserter FlagsInserter(DAG, FastMathFlags);
50785081
MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT);
50795082
SDLoc dl(Node);
50805083
SDValue Tmp1, Tmp2, Tmp3, Tmp4;

llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,10 @@ bool DAGTypeLegalizer::run() {
233233
assert(N->getNodeId() == ReadyToProcess &&
234234
"Node should be ready if on worklist!");
235235

236+
// Preserve fast math flags
237+
SDNodeFlags FastMathFlags = N->getFlags() & SDNodeFlags::FastMathFlags;
238+
SelectionDAG::FlagInserter FlagsInserter(DAG, FastMathFlags);
239+
236240
LLVM_DEBUG(dbgs() << "\nLegalizing node: "; N->dump(&DAG));
237241
if (IgnoreNodeResults(N)) {
238242
LLVM_DEBUG(dbgs() << "Ignoring node results\n");
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=aarch64 -mattr=-fullfp16 -stop-after=finalize-isel | FileCheck %s --check-prefixes=CHECK-CVT
3+
; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 -stop-after=finalize-isel | FileCheck %s --check-prefixes=CHECK-FP16
4+
5+
; Check that the output instructions have the same fast math flags as the input
6+
; fadd, even when fadd is promoted to float type.
7+
8+
define half @normal_fadd(half %x, half %y) {
9+
; CHECK-CVT-LABEL: name: normal_fadd
10+
; CHECK-CVT: bb.0.entry:
11+
; CHECK-CVT-NEXT: liveins: $h0, $h1
12+
; CHECK-CVT-NEXT: {{ $}}
13+
; CHECK-CVT-NEXT: [[COPY:%[0-9]+]]:fpr16 = COPY $h1
14+
; CHECK-CVT-NEXT: [[COPY1:%[0-9]+]]:fpr16 = COPY $h0
15+
; CHECK-CVT-NEXT: [[FCVTSHr:%[0-9]+]]:fpr32 = nofpexcept FCVTSHr [[COPY]], implicit $fpcr
16+
; CHECK-CVT-NEXT: [[FCVTSHr1:%[0-9]+]]:fpr32 = nofpexcept FCVTSHr [[COPY1]], implicit $fpcr
17+
; CHECK-CVT-NEXT: [[FADDSrr:%[0-9]+]]:fpr32 = nofpexcept FADDSrr killed [[FCVTSHr1]], killed [[FCVTSHr]], implicit $fpcr
18+
; CHECK-CVT-NEXT: [[FCVTHSr:%[0-9]+]]:fpr16 = nofpexcept FCVTHSr killed [[FADDSrr]], implicit $fpcr
19+
; CHECK-CVT-NEXT: $h0 = COPY [[FCVTHSr]]
20+
; CHECK-CVT-NEXT: RET_ReallyLR implicit $h0
21+
;
22+
; CHECK-FP16-LABEL: name: normal_fadd
23+
; CHECK-FP16: bb.0.entry:
24+
; CHECK-FP16-NEXT: liveins: $h0, $h1
25+
; CHECK-FP16-NEXT: {{ $}}
26+
; CHECK-FP16-NEXT: [[COPY:%[0-9]+]]:fpr16 = COPY $h1
27+
; CHECK-FP16-NEXT: [[COPY1:%[0-9]+]]:fpr16 = COPY $h0
28+
; CHECK-FP16-NEXT: [[FADDHrr:%[0-9]+]]:fpr16 = nofpexcept FADDHrr [[COPY1]], [[COPY]], implicit $fpcr
29+
; CHECK-FP16-NEXT: $h0 = COPY [[FADDHrr]]
30+
; CHECK-FP16-NEXT: RET_ReallyLR implicit $h0
31+
entry:
32+
%add = fadd half %x, %y
33+
ret half %add
34+
}
35+
36+
define half @fast_fadd(half %x, half %y) {
37+
; CHECK-CVT-LABEL: name: fast_fadd
38+
; CHECK-CVT: bb.0.entry:
39+
; CHECK-CVT-NEXT: liveins: $h0, $h1
40+
; CHECK-CVT-NEXT: {{ $}}
41+
; CHECK-CVT-NEXT: [[COPY:%[0-9]+]]:fpr16 = COPY $h1
42+
; CHECK-CVT-NEXT: [[COPY1:%[0-9]+]]:fpr16 = COPY $h0
43+
; CHECK-CVT-NEXT: [[FCVTSHr:%[0-9]+]]:fpr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept FCVTSHr [[COPY]], implicit $fpcr
44+
; CHECK-CVT-NEXT: [[FCVTSHr1:%[0-9]+]]:fpr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept FCVTSHr [[COPY1]], implicit $fpcr
45+
; CHECK-CVT-NEXT: [[FADDSrr:%[0-9]+]]:fpr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDSrr killed [[FCVTSHr1]], killed [[FCVTSHr]], implicit $fpcr
46+
; CHECK-CVT-NEXT: [[FCVTHSr:%[0-9]+]]:fpr16 = nnan ninf nsz arcp contract afn reassoc nofpexcept FCVTHSr killed [[FADDSrr]], implicit $fpcr
47+
; CHECK-CVT-NEXT: $h0 = COPY [[FCVTHSr]]
48+
; CHECK-CVT-NEXT: RET_ReallyLR implicit $h0
49+
;
50+
; CHECK-FP16-LABEL: name: fast_fadd
51+
; CHECK-FP16: bb.0.entry:
52+
; CHECK-FP16-NEXT: liveins: $h0, $h1
53+
; CHECK-FP16-NEXT: {{ $}}
54+
; CHECK-FP16-NEXT: [[COPY:%[0-9]+]]:fpr16 = COPY $h1
55+
; CHECK-FP16-NEXT: [[COPY1:%[0-9]+]]:fpr16 = COPY $h0
56+
; CHECK-FP16-NEXT: [[FADDHrr:%[0-9]+]]:fpr16 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDHrr [[COPY1]], [[COPY]], implicit $fpcr
57+
; CHECK-FP16-NEXT: $h0 = COPY [[FADDHrr]]
58+
; CHECK-FP16-NEXT: RET_ReallyLR implicit $h0
59+
entry:
60+
%add = fadd fast half %x, %y
61+
ret half %add
62+
}
63+
64+
define half @ninf_fadd(half %x, half %y) {
65+
; CHECK-CVT-LABEL: name: ninf_fadd
66+
; CHECK-CVT: bb.0.entry:
67+
; CHECK-CVT-NEXT: liveins: $h0, $h1
68+
; CHECK-CVT-NEXT: {{ $}}
69+
; CHECK-CVT-NEXT: [[COPY:%[0-9]+]]:fpr16 = COPY $h1
70+
; CHECK-CVT-NEXT: [[COPY1:%[0-9]+]]:fpr16 = COPY $h0
71+
; CHECK-CVT-NEXT: [[FCVTSHr:%[0-9]+]]:fpr32 = ninf nofpexcept FCVTSHr [[COPY]], implicit $fpcr
72+
; CHECK-CVT-NEXT: [[FCVTSHr1:%[0-9]+]]:fpr32 = ninf nofpexcept FCVTSHr [[COPY1]], implicit $fpcr
73+
; CHECK-CVT-NEXT: [[FADDSrr:%[0-9]+]]:fpr32 = ninf nofpexcept FADDSrr killed [[FCVTSHr1]], killed [[FCVTSHr]], implicit $fpcr
74+
; CHECK-CVT-NEXT: [[FCVTHSr:%[0-9]+]]:fpr16 = ninf nofpexcept FCVTHSr killed [[FADDSrr]], implicit $fpcr
75+
; CHECK-CVT-NEXT: $h0 = COPY [[FCVTHSr]]
76+
; CHECK-CVT-NEXT: RET_ReallyLR implicit $h0
77+
;
78+
; CHECK-FP16-LABEL: name: ninf_fadd
79+
; CHECK-FP16: bb.0.entry:
80+
; CHECK-FP16-NEXT: liveins: $h0, $h1
81+
; CHECK-FP16-NEXT: {{ $}}
82+
; CHECK-FP16-NEXT: [[COPY:%[0-9]+]]:fpr16 = COPY $h1
83+
; CHECK-FP16-NEXT: [[COPY1:%[0-9]+]]:fpr16 = COPY $h0
84+
; CHECK-FP16-NEXT: [[FADDHrr:%[0-9]+]]:fpr16 = ninf nofpexcept FADDHrr [[COPY1]], [[COPY]], implicit $fpcr
85+
; CHECK-FP16-NEXT: $h0 = COPY [[FADDHrr]]
86+
; CHECK-FP16-NEXT: RET_ReallyLR implicit $h0
87+
entry:
88+
%add = fadd ninf half %x, %y
89+
ret half %add
90+
}
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=arm -mattr=+vfp4d16sp,-fullfp16 -stop-after=finalize-isel | FileCheck %s --check-prefixes=CHECK-CVT
3+
; RUN: llc < %s -mtriple=arm -mattr=+vfp4d16sp,+fullfp16 -stop-after=finalize-isel | FileCheck %s --check-prefixes=CHECK-FP16
4+
5+
; Check that the output instructions have the same fast math flags as the input
6+
; fadd, even when f16 is legalized to f32.
7+
; FIXME: We don't get fast math flags on VCVTBHS because they get lost during a
8+
; DAGCombine transformation.
9+
; FIXME: We don't get fast math flags on VCVTBSH because the outermost node in
10+
; the isel pattern is COPY_TO_REGCLASS and the fast math flags end up there.
11+
12+
define half @normal_fadd(half %x, half %y) {
13+
; CHECK-CVT-LABEL: name: normal_fadd
14+
; CHECK-CVT: bb.0.entry:
15+
; CHECK-CVT-NEXT: liveins: $r0, $r1
16+
; CHECK-CVT-NEXT: {{ $}}
17+
; CHECK-CVT-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $r1
18+
; CHECK-CVT-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $r0
19+
; CHECK-CVT-NEXT: [[COPY2:%[0-9]+]]:spr = COPY [[COPY1]]
20+
; CHECK-CVT-NEXT: [[COPY3:%[0-9]+]]:spr = COPY [[COPY]]
21+
; CHECK-CVT-NEXT: [[VCVTBHS:%[0-9]+]]:spr = VCVTBHS killed [[COPY3]], 14 /* CC::al */, $noreg
22+
; CHECK-CVT-NEXT: [[VCVTBHS1:%[0-9]+]]:spr = VCVTBHS killed [[COPY2]], 14 /* CC::al */, $noreg
23+
; CHECK-CVT-NEXT: [[VADDS:%[0-9]+]]:spr = VADDS killed [[VCVTBHS1]], killed [[VCVTBHS]], 14 /* CC::al */, $noreg
24+
; CHECK-CVT-NEXT: [[DEF:%[0-9]+]]:spr = IMPLICIT_DEF
25+
; CHECK-CVT-NEXT: [[VCVTBSH:%[0-9]+]]:spr = VCVTBSH [[DEF]], killed [[VADDS]], 14 /* CC::al */, $noreg
26+
; CHECK-CVT-NEXT: [[COPY4:%[0-9]+]]:gpr = COPY [[VCVTBSH]]
27+
; CHECK-CVT-NEXT: $r0 = COPY [[COPY4]]
28+
; CHECK-CVT-NEXT: MOVPCLR 14 /* CC::al */, $noreg, implicit $r0
29+
;
30+
; CHECK-FP16-LABEL: name: normal_fadd
31+
; CHECK-FP16: bb.0.entry:
32+
; CHECK-FP16-NEXT: liveins: $r0, $r1
33+
; CHECK-FP16-NEXT: {{ $}}
34+
; CHECK-FP16-NEXT: [[COPY:%[0-9]+]]:rgpr = COPY $r1
35+
; CHECK-FP16-NEXT: [[COPY1:%[0-9]+]]:rgpr = COPY $r0
36+
; CHECK-FP16-NEXT: [[VMOVHR:%[0-9]+]]:hpr = VMOVHR [[COPY]], 14, $noreg
37+
; CHECK-FP16-NEXT: [[VMOVHR1:%[0-9]+]]:hpr = VMOVHR [[COPY1]], 14, $noreg
38+
; CHECK-FP16-NEXT: [[VADDH:%[0-9]+]]:hpr = VADDH killed [[VMOVHR1]], killed [[VMOVHR]], 14, $noreg
39+
; CHECK-FP16-NEXT: $r0 = COPY [[VADDH]]
40+
; CHECK-FP16-NEXT: MOVPCLR 14 /* CC::al */, $noreg, implicit $r0
41+
entry:
42+
%add = fadd half %x, %y
43+
ret half %add
44+
}
45+
46+
define half @fast_fadd(half %x, half %y) {
47+
; CHECK-CVT-LABEL: name: fast_fadd
48+
; CHECK-CVT: bb.0.entry:
49+
; CHECK-CVT-NEXT: liveins: $r0, $r1
50+
; CHECK-CVT-NEXT: {{ $}}
51+
; CHECK-CVT-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $r1
52+
; CHECK-CVT-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $r0
53+
; CHECK-CVT-NEXT: [[COPY2:%[0-9]+]]:spr = COPY [[COPY1]]
54+
; CHECK-CVT-NEXT: [[COPY3:%[0-9]+]]:spr = COPY [[COPY]]
55+
; CHECK-CVT-NEXT: [[VCVTBHS:%[0-9]+]]:spr = VCVTBHS killed [[COPY3]], 14 /* CC::al */, $noreg
56+
; CHECK-CVT-NEXT: [[VCVTBHS1:%[0-9]+]]:spr = VCVTBHS killed [[COPY2]], 14 /* CC::al */, $noreg
57+
; CHECK-CVT-NEXT: [[VADDS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS killed [[VCVTBHS1]], killed [[VCVTBHS]], 14 /* CC::al */, $noreg
58+
; CHECK-CVT-NEXT: [[DEF:%[0-9]+]]:spr = IMPLICIT_DEF
59+
; CHECK-CVT-NEXT: [[VCVTBSH:%[0-9]+]]:spr = VCVTBSH [[DEF]], killed [[VADDS]], 14 /* CC::al */, $noreg
60+
; CHECK-CVT-NEXT: [[COPY4:%[0-9]+]]:gpr = COPY [[VCVTBSH]]
61+
; CHECK-CVT-NEXT: $r0 = COPY [[COPY4]]
62+
; CHECK-CVT-NEXT: MOVPCLR 14 /* CC::al */, $noreg, implicit $r0
63+
;
64+
; CHECK-FP16-LABEL: name: fast_fadd
65+
; CHECK-FP16: bb.0.entry:
66+
; CHECK-FP16-NEXT: liveins: $r0, $r1
67+
; CHECK-FP16-NEXT: {{ $}}
68+
; CHECK-FP16-NEXT: [[COPY:%[0-9]+]]:rgpr = COPY $r1
69+
; CHECK-FP16-NEXT: [[COPY1:%[0-9]+]]:rgpr = COPY $r0
70+
; CHECK-FP16-NEXT: [[VMOVHR:%[0-9]+]]:hpr = VMOVHR [[COPY]], 14, $noreg
71+
; CHECK-FP16-NEXT: [[VMOVHR1:%[0-9]+]]:hpr = VMOVHR [[COPY1]], 14, $noreg
72+
; CHECK-FP16-NEXT: [[VADDH:%[0-9]+]]:hpr = nnan ninf nsz arcp contract afn reassoc VADDH killed [[VMOVHR1]], killed [[VMOVHR]], 14, $noreg
73+
; CHECK-FP16-NEXT: $r0 = COPY [[VADDH]]
74+
; CHECK-FP16-NEXT: MOVPCLR 14 /* CC::al */, $noreg, implicit $r0
75+
entry:
76+
%add = fadd fast half %x, %y
77+
ret half %add
78+
}
79+
80+
define half @ninf_fadd(half %x, half %y) {
81+
; CHECK-CVT-LABEL: name: ninf_fadd
82+
; CHECK-CVT: bb.0.entry:
83+
; CHECK-CVT-NEXT: liveins: $r0, $r1
84+
; CHECK-CVT-NEXT: {{ $}}
85+
; CHECK-CVT-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $r1
86+
; CHECK-CVT-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $r0
87+
; CHECK-CVT-NEXT: [[COPY2:%[0-9]+]]:spr = COPY [[COPY1]]
88+
; CHECK-CVT-NEXT: [[COPY3:%[0-9]+]]:spr = COPY [[COPY]]
89+
; CHECK-CVT-NEXT: [[VCVTBHS:%[0-9]+]]:spr = VCVTBHS killed [[COPY3]], 14 /* CC::al */, $noreg
90+
; CHECK-CVT-NEXT: [[VCVTBHS1:%[0-9]+]]:spr = VCVTBHS killed [[COPY2]], 14 /* CC::al */, $noreg
91+
; CHECK-CVT-NEXT: [[VADDS:%[0-9]+]]:spr = ninf VADDS killed [[VCVTBHS1]], killed [[VCVTBHS]], 14 /* CC::al */, $noreg
92+
; CHECK-CVT-NEXT: [[DEF:%[0-9]+]]:spr = IMPLICIT_DEF
93+
; CHECK-CVT-NEXT: [[VCVTBSH:%[0-9]+]]:spr = VCVTBSH [[DEF]], killed [[VADDS]], 14 /* CC::al */, $noreg
94+
; CHECK-CVT-NEXT: [[COPY4:%[0-9]+]]:gpr = COPY [[VCVTBSH]]
95+
; CHECK-CVT-NEXT: $r0 = COPY [[COPY4]]
96+
; CHECK-CVT-NEXT: MOVPCLR 14 /* CC::al */, $noreg, implicit $r0
97+
;
98+
; CHECK-FP16-LABEL: name: ninf_fadd
99+
; CHECK-FP16: bb.0.entry:
100+
; CHECK-FP16-NEXT: liveins: $r0, $r1
101+
; CHECK-FP16-NEXT: {{ $}}
102+
; CHECK-FP16-NEXT: [[COPY:%[0-9]+]]:rgpr = COPY $r1
103+
; CHECK-FP16-NEXT: [[COPY1:%[0-9]+]]:rgpr = COPY $r0
104+
; CHECK-FP16-NEXT: [[VMOVHR:%[0-9]+]]:hpr = VMOVHR [[COPY]], 14, $noreg
105+
; CHECK-FP16-NEXT: [[VMOVHR1:%[0-9]+]]:hpr = VMOVHR [[COPY1]], 14, $noreg
106+
; CHECK-FP16-NEXT: [[VADDH:%[0-9]+]]:hpr = ninf VADDH killed [[VMOVHR1]], killed [[VMOVHR]], 14, $noreg
107+
; CHECK-FP16-NEXT: $r0 = COPY [[VADDH]]
108+
; CHECK-FP16-NEXT: MOVPCLR 14 /* CC::al */, $noreg, implicit $r0
109+
entry:
110+
%add = fadd ninf half %x, %y
111+
ret half %add
112+
}

llvm/test/CodeGen/ARM/vecreduce-fmax-legalization-soft-float.ll

Lines changed: 26 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -11,37 +11,49 @@ define half @test_v4f16(<4 x half> %a) nounwind {
1111
; CHECK: @ %bb.0:
1212
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr}
1313
; CHECK-NEXT: push {r4, r5, r6, r7, r8, lr}
14-
; CHECK-NEXT: mov r8, #255
15-
; CHECK-NEXT: mov r4, r3
16-
; CHECK-NEXT: orr r8, r8, #65280
14+
; CHECK-NEXT: mov r4, #255
15+
; CHECK-NEXT: mov r8, r3
16+
; CHECK-NEXT: orr r4, r4, #65280
1717
; CHECK-NEXT: mov r5, r2
18-
; CHECK-NEXT: and r0, r0, r8
18+
; CHECK-NEXT: and r0, r0, r4
1919
; CHECK-NEXT: mov r6, r1
2020
; CHECK-NEXT: bl __aeabi_h2f
2121
; CHECK-NEXT: mov r7, r0
22-
; CHECK-NEXT: and r0, r6, r8
22+
; CHECK-NEXT: and r0, r6, r4
2323
; CHECK-NEXT: bl __aeabi_h2f
24-
; CHECK-NEXT: mov r1, r0
24+
; CHECK-NEXT: mov r6, r0
2525
; CHECK-NEXT: mov r0, r7
26-
; CHECK-NEXT: bl fmaxf
26+
; CHECK-NEXT: mov r1, r6
27+
; CHECK-NEXT: bl __aeabi_fcmpgt
28+
; CHECK-NEXT: cmp r0, #0
29+
; CHECK-NEXT: movne r6, r7
30+
; CHECK-NEXT: mov r0, r6
2731
; CHECK-NEXT: bl __aeabi_f2h
2832
; CHECK-NEXT: mov r6, r0
29-
; CHECK-NEXT: and r0, r5, r8
33+
; CHECK-NEXT: and r0, r5, r4
3034
; CHECK-NEXT: bl __aeabi_h2f
3135
; CHECK-NEXT: mov r5, r0
32-
; CHECK-NEXT: and r0, r6, r8
36+
; CHECK-NEXT: and r0, r6, r4
3337
; CHECK-NEXT: bl __aeabi_h2f
3438
; CHECK-NEXT: mov r1, r5
35-
; CHECK-NEXT: bl fmaxf
39+
; CHECK-NEXT: mov r6, r0
40+
; CHECK-NEXT: bl __aeabi_fcmpgt
41+
; CHECK-NEXT: cmp r0, #0
42+
; CHECK-NEXT: movne r5, r6
43+
; CHECK-NEXT: mov r0, r5
3644
; CHECK-NEXT: bl __aeabi_f2h
45+
; CHECK-NEXT: and r0, r0, r4
46+
; CHECK-NEXT: bl __aeabi_h2f
3747
; CHECK-NEXT: mov r5, r0
38-
; CHECK-NEXT: and r0, r4, r8
48+
; CHECK-NEXT: and r0, r8, r4
3949
; CHECK-NEXT: bl __aeabi_h2f
4050
; CHECK-NEXT: mov r4, r0
41-
; CHECK-NEXT: and r0, r5, r8
42-
; CHECK-NEXT: bl __aeabi_h2f
51+
; CHECK-NEXT: mov r0, r5
4352
; CHECK-NEXT: mov r1, r4
44-
; CHECK-NEXT: bl fmaxf
53+
; CHECK-NEXT: bl __aeabi_fcmpgt
54+
; CHECK-NEXT: cmp r0, #0
55+
; CHECK-NEXT: movne r4, r5
56+
; CHECK-NEXT: mov r0, r4
4557
; CHECK-NEXT: bl __aeabi_f2h
4658
; CHECK-NEXT: pop {r4, r5, r6, r7, r8, lr}
4759
; CHECK-NEXT: mov pc, lr

llvm/test/CodeGen/ARM/vecreduce-fmin-legalization-soft-float.ll

Lines changed: 26 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -11,37 +11,49 @@ define half @test_v4f16(<4 x half> %a) nounwind {
1111
; CHECK: @ %bb.0:
1212
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr}
1313
; CHECK-NEXT: push {r4, r5, r6, r7, r8, lr}
14-
; CHECK-NEXT: mov r8, #255
15-
; CHECK-NEXT: mov r4, r3
16-
; CHECK-NEXT: orr r8, r8, #65280
14+
; CHECK-NEXT: mov r4, #255
15+
; CHECK-NEXT: mov r8, r3
16+
; CHECK-NEXT: orr r4, r4, #65280
1717
; CHECK-NEXT: mov r5, r2
18-
; CHECK-NEXT: and r0, r0, r8
18+
; CHECK-NEXT: and r0, r0, r4
1919
; CHECK-NEXT: mov r6, r1
2020
; CHECK-NEXT: bl __aeabi_h2f
2121
; CHECK-NEXT: mov r7, r0
22-
; CHECK-NEXT: and r0, r6, r8
22+
; CHECK-NEXT: and r0, r6, r4
2323
; CHECK-NEXT: bl __aeabi_h2f
24-
; CHECK-NEXT: mov r1, r0
24+
; CHECK-NEXT: mov r6, r0
2525
; CHECK-NEXT: mov r0, r7
26-
; CHECK-NEXT: bl fminf
26+
; CHECK-NEXT: mov r1, r6
27+
; CHECK-NEXT: bl __aeabi_fcmplt
28+
; CHECK-NEXT: cmp r0, #0
29+
; CHECK-NEXT: movne r6, r7
30+
; CHECK-NEXT: mov r0, r6
2731
; CHECK-NEXT: bl __aeabi_f2h
2832
; CHECK-NEXT: mov r6, r0
29-
; CHECK-NEXT: and r0, r5, r8
33+
; CHECK-NEXT: and r0, r5, r4
3034
; CHECK-NEXT: bl __aeabi_h2f
3135
; CHECK-NEXT: mov r5, r0
32-
; CHECK-NEXT: and r0, r6, r8
36+
; CHECK-NEXT: and r0, r6, r4
3337
; CHECK-NEXT: bl __aeabi_h2f
3438
; CHECK-NEXT: mov r1, r5
35-
; CHECK-NEXT: bl fminf
39+
; CHECK-NEXT: mov r6, r0
40+
; CHECK-NEXT: bl __aeabi_fcmplt
41+
; CHECK-NEXT: cmp r0, #0
42+
; CHECK-NEXT: movne r5, r6
43+
; CHECK-NEXT: mov r0, r5
3644
; CHECK-NEXT: bl __aeabi_f2h
45+
; CHECK-NEXT: and r0, r0, r4
46+
; CHECK-NEXT: bl __aeabi_h2f
3747
; CHECK-NEXT: mov r5, r0
38-
; CHECK-NEXT: and r0, r4, r8
48+
; CHECK-NEXT: and r0, r8, r4
3949
; CHECK-NEXT: bl __aeabi_h2f
4050
; CHECK-NEXT: mov r4, r0
41-
; CHECK-NEXT: and r0, r5, r8
42-
; CHECK-NEXT: bl __aeabi_h2f
51+
; CHECK-NEXT: mov r0, r5
4352
; CHECK-NEXT: mov r1, r4
44-
; CHECK-NEXT: bl fminf
53+
; CHECK-NEXT: bl __aeabi_fcmplt
54+
; CHECK-NEXT: cmp r0, #0
55+
; CHECK-NEXT: movne r4, r5
56+
; CHECK-NEXT: mov r0, r4
4557
; CHECK-NEXT: bl __aeabi_f2h
4658
; CHECK-NEXT: pop {r4, r5, r6, r7, r8, lr}
4759
; CHECK-NEXT: mov pc, lr

0 commit comments

Comments
 (0)