Skip to content

Commit 567b4f3

Browse files
committed
Use FMA
1 parent d1c5a08 commit 567b4f3

File tree

4 files changed

+34
-44
lines changed

4 files changed

+34
-44
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17289,8 +17289,13 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
1728917289
(Flags.hasNoSignedZeros() || DAG.isKnownNonNegativeFP(N0))) {
1729017290
SDValue Div = DAG.getNode(ISD::FDIV, DL, VT, N0, N1);
1729117291
SDValue Rnd = DAG.getNode(ISD::FTRUNC, DL, VT, Div);
17292-
SDValue Mul = DAG.getNode(ISD::FMUL, DL, VT, Rnd, N1);
17293-
return DAG.getNode(ISD::FSUB, DL, VT, N0, Mul);
17292+
if (TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT)) {
17293+
return DAG.getNode(ISD::FMA, DL, VT, DAG.getNode(ISD::FNEG, DL, VT, Rnd),
17294+
N1, N0);
17295+
} else {
17296+
SDValue Mul = DAG.getNode(ISD::FMUL, DL, VT, Rnd, N1);
17297+
return DAG.getNode(ISD::FSUB, DL, VT, N0, Mul);
17298+
}
1729417299
}
1729517300

1729617301
return SDValue();

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5567,7 +5567,7 @@ bool SelectionDAG::isKnownNeverZero(SDValue Op, unsigned Depth) const {
55675567

55685568
bool SelectionDAG::isKnownNonNegativeFP(SDValue Op) const {
55695569
if (ConstantFPSDNode *C1 = isConstOrConstSplatFP(Op, true))
5570-
return !C1->isNegative();
5570+
return !C1->isNegative() && !C1->isNaN();
55715571

55725572
return Op.getOpcode() == ISD::FABS;
55735573
}

llvm/test/CodeGen/AArch64/frem-power2.ll

Lines changed: 25 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,9 @@ define float @frem2_nsz(float %x) {
1616
; CHECK-SD-LABEL: frem2_nsz:
1717
; CHECK-SD: // %bb.0: // %entry
1818
; CHECK-SD-NEXT: fmov s1, #2.00000000
19-
; CHECK-SD-NEXT: fdiv s1, s0, s1
20-
; CHECK-SD-NEXT: frintz s1, s1
21-
; CHECK-SD-NEXT: fadd s1, s1, s1
22-
; CHECK-SD-NEXT: fsub s0, s0, s1
19+
; CHECK-SD-NEXT: fdiv s2, s0, s1
20+
; CHECK-SD-NEXT: frintz s2, s2
21+
; CHECK-SD-NEXT: fmsub s0, s2, s1, s0
2322
; CHECK-SD-NEXT: ret
2423
;
2524
; CHECK-GI-LABEL: frem2_nsz:
@@ -55,10 +54,9 @@ define float @frem2_abs(float %x) {
5554
; CHECK-SD: // %bb.0: // %entry
5655
; CHECK-SD-NEXT: fabs s0, s0
5756
; CHECK-SD-NEXT: fmov s1, #2.00000000
58-
; CHECK-SD-NEXT: fdiv s1, s0, s1
59-
; CHECK-SD-NEXT: frintz s1, s1
60-
; CHECK-SD-NEXT: fadd s1, s1, s1
61-
; CHECK-SD-NEXT: fsub s0, s0, s1
57+
; CHECK-SD-NEXT: fdiv s2, s0, s1
58+
; CHECK-SD-NEXT: frintz s2, s2
59+
; CHECK-SD-NEXT: fmsub s0, s2, s1, s0
6260
; CHECK-SD-NEXT: ret
6361
;
6462
; CHECK-GI-LABEL: frem2_abs:
@@ -76,10 +74,10 @@ define half @hrem2_nsz(half %x) {
7674
; CHECK-SD-LABEL: hrem2_nsz:
7775
; CHECK-SD: // %bb.0: // %entry
7876
; CHECK-SD-NEXT: fmov h1, #2.00000000
77+
; CHECK-SD-NEXT: fmov h2, #-2.00000000
7978
; CHECK-SD-NEXT: fdiv h1, h0, h1
8079
; CHECK-SD-NEXT: frintz h1, h1
81-
; CHECK-SD-NEXT: fadd h1, h1, h1
82-
; CHECK-SD-NEXT: fsub h0, h0, h1
80+
; CHECK-SD-NEXT: fmadd h0, h1, h2, h0
8381
; CHECK-SD-NEXT: ret
8482
;
8583
; CHECK-GI-LABEL: hrem2_nsz:
@@ -103,10 +101,9 @@ define double @drem2_nsz(double %x) {
103101
; CHECK-SD-LABEL: drem2_nsz:
104102
; CHECK-SD: // %bb.0: // %entry
105103
; CHECK-SD-NEXT: fmov d1, #2.00000000
106-
; CHECK-SD-NEXT: fdiv d1, d0, d1
107-
; CHECK-SD-NEXT: frintz d1, d1
108-
; CHECK-SD-NEXT: fadd d1, d1, d1
109-
; CHECK-SD-NEXT: fsub d0, d0, d1
104+
; CHECK-SD-NEXT: fdiv d2, d0, d1
105+
; CHECK-SD-NEXT: frintz d2, d2
106+
; CHECK-SD-NEXT: fmsub d0, d2, d1, d0
110107
; CHECK-SD-NEXT: ret
111108
;
112109
; CHECK-GI-LABEL: drem2_nsz:
@@ -170,8 +167,7 @@ define float @fremm2_nsz(float %x) {
170167
; CHECK-SD-NEXT: fmov s1, #-2.00000000
171168
; CHECK-SD-NEXT: fdiv s2, s0, s1
172169
; CHECK-SD-NEXT: frintz s2, s2
173-
; CHECK-SD-NEXT: fmul s1, s2, s1
174-
; CHECK-SD-NEXT: fsub s0, s0, s1
170+
; CHECK-SD-NEXT: fmsub s0, s2, s1, s0
175171
; CHECK-SD-NEXT: ret
176172
;
177173
; CHECK-GI-LABEL: fremm2_nsz:
@@ -190,8 +186,7 @@ define float @frem4_abs(float %x) {
190186
; CHECK-SD-NEXT: fmov s1, #4.00000000
191187
; CHECK-SD-NEXT: fdiv s2, s0, s1
192188
; CHECK-SD-NEXT: frintz s2, s2
193-
; CHECK-SD-NEXT: fmul s1, s2, s1
194-
; CHECK-SD-NEXT: fsub s0, s0, s1
189+
; CHECK-SD-NEXT: fmsub s0, s2, s1, s0
195190
; CHECK-SD-NEXT: ret
196191
;
197192
; CHECK-GI-LABEL: frem4_abs:
@@ -212,8 +207,7 @@ define float @frem16_abs(float %x) {
212207
; CHECK-SD-NEXT: fmov s1, #16.00000000
213208
; CHECK-SD-NEXT: fdiv s2, s0, s1
214209
; CHECK-SD-NEXT: frintz s2, s2
215-
; CHECK-SD-NEXT: fmul s1, s2, s1
216-
; CHECK-SD-NEXT: fsub s0, s0, s1
210+
; CHECK-SD-NEXT: fmsub s0, s2, s1, s0
217211
; CHECK-SD-NEXT: ret
218212
;
219213
; CHECK-GI-LABEL: frem16_abs:
@@ -235,8 +229,7 @@ define float @frem4294967296_abs(float %x) {
235229
; CHECK-SD-NEXT: fmov s1, w8
236230
; CHECK-SD-NEXT: fdiv s2, s0, s1
237231
; CHECK-SD-NEXT: frintz s2, s2
238-
; CHECK-SD-NEXT: fmul s1, s2, s1
239-
; CHECK-SD-NEXT: fsub s0, s0, s1
232+
; CHECK-SD-NEXT: fmsub s0, s2, s1, s0
240233
; CHECK-SD-NEXT: ret
241234
;
242235
; CHECK-GI-LABEL: frem4294967296_abs:
@@ -259,8 +252,7 @@ define float @frem1152921504606846976_abs(float %x) {
259252
; CHECK-SD-NEXT: fmov s1, w8
260253
; CHECK-SD-NEXT: fdiv s2, s0, s1
261254
; CHECK-SD-NEXT: frintz s2, s2
262-
; CHECK-SD-NEXT: fmul s1, s2, s1
263-
; CHECK-SD-NEXT: fsub s0, s0, s1
255+
; CHECK-SD-NEXT: fmsub s0, s2, s1, s0
264256
; CHECK-SD-NEXT: ret
265257
;
266258
; CHECK-GI-LABEL: frem1152921504606846976_abs:
@@ -283,8 +275,7 @@ define float @frem4611686018427387904_abs(float %x) {
283275
; CHECK-SD-NEXT: fmov s1, w8
284276
; CHECK-SD-NEXT: fdiv s2, s0, s1
285277
; CHECK-SD-NEXT: frintz s2, s2
286-
; CHECK-SD-NEXT: fmul s1, s2, s1
287-
; CHECK-SD-NEXT: fsub s0, s0, s1
278+
; CHECK-SD-NEXT: fmsub s0, s2, s1, s0
288279
; CHECK-SD-NEXT: ret
289280
;
290281
; CHECK-GI-LABEL: frem4611686018427387904_abs:
@@ -306,8 +297,7 @@ define float @frem9223372036854775808_abs(float %x) {
306297
; CHECK-SD-NEXT: fabs s0, s0
307298
; CHECK-SD-NEXT: fdiv s2, s0, s1
308299
; CHECK-SD-NEXT: frintz s2, s2
309-
; CHECK-SD-NEXT: fmul s1, s2, s1
310-
; CHECK-SD-NEXT: fsub s0, s0, s1
300+
; CHECK-SD-NEXT: fmsub s0, s2, s1, s0
311301
; CHECK-SD-NEXT: ret
312302
;
313303
; CHECK-GI-LABEL: frem9223372036854775808_abs:
@@ -325,10 +315,9 @@ define <4 x float> @frem2_nsz_vec(<4 x float> %x) {
325315
; CHECK-SD-LABEL: frem2_nsz_vec:
326316
; CHECK-SD: // %bb.0: // %entry
327317
; CHECK-SD-NEXT: movi v1.4s, #64, lsl #24
328-
; CHECK-SD-NEXT: fdiv v1.4s, v0.4s, v1.4s
329-
; CHECK-SD-NEXT: frintz v1.4s, v1.4s
330-
; CHECK-SD-NEXT: fadd v1.4s, v1.4s, v1.4s
331-
; CHECK-SD-NEXT: fsub v0.4s, v0.4s, v1.4s
318+
; CHECK-SD-NEXT: fdiv v2.4s, v0.4s, v1.4s
319+
; CHECK-SD-NEXT: frintz v2.4s, v2.4s
320+
; CHECK-SD-NEXT: fmls v0.4s, v1.4s, v2.4s
332321
; CHECK-SD-NEXT: ret
333322
;
334323
; CHECK-GI-LABEL: frem2_nsz_vec:
@@ -388,8 +377,7 @@ define <4 x float> @frem1152921504606846976_absv(<4 x float> %x) {
388377
; CHECK-SD-NEXT: dup v1.4s, w8
389378
; CHECK-SD-NEXT: fdiv v2.4s, v0.4s, v1.4s
390379
; CHECK-SD-NEXT: frintz v2.4s, v2.4s
391-
; CHECK-SD-NEXT: fmul v1.4s, v2.4s, v1.4s
392-
; CHECK-SD-NEXT: fsub v0.4s, v0.4s, v1.4s
380+
; CHECK-SD-NEXT: fmls v0.4s, v1.4s, v2.4s
393381
; CHECK-SD-NEXT: ret
394382
;
395383
; CHECK-GI-LABEL: frem1152921504606846976_absv:
@@ -454,8 +442,7 @@ define float @frem2_nsz_sitofp(float %x, i32 %sa) {
454442
; CHECK-SD-NEXT: scvtf s1, w8
455443
; CHECK-SD-NEXT: fdiv s2, s0, s1
456444
; CHECK-SD-NEXT: frintz s2, s2
457-
; CHECK-SD-NEXT: fmul s1, s2, s1
458-
; CHECK-SD-NEXT: fsub s0, s0, s1
445+
; CHECK-SD-NEXT: fmsub s0, s2, s1, s0
459446
; CHECK-SD-NEXT: ret
460447
;
461448
; CHECK-GI-LABEL: frem2_nsz_sitofp:
@@ -479,8 +466,7 @@ define float @frem2_nsz_uitofp(float %x, i32 %sa) {
479466
; CHECK-SD-NEXT: ucvtf s1, w8
480467
; CHECK-SD-NEXT: fdiv s2, s0, s1
481468
; CHECK-SD-NEXT: frintz s2, s2
482-
; CHECK-SD-NEXT: fmul s1, s2, s1
483-
; CHECK-SD-NEXT: fsub s0, s0, s1
469+
; CHECK-SD-NEXT: fmsub s0, s2, s1, s0
484470
; CHECK-SD-NEXT: ret
485471
;
486472
; CHECK-GI-LABEL: frem2_nsz_uitofp:
@@ -505,8 +491,7 @@ define float @frem2_const_sitofp(float %x, i32 %sa) {
505491
; CHECK-SD-NEXT: scvtf s0, w8
506492
; CHECK-SD-NEXT: fdiv s2, s1, s0
507493
; CHECK-SD-NEXT: frintz s2, s2
508-
; CHECK-SD-NEXT: fmul s0, s2, s0
509-
; CHECK-SD-NEXT: fsub s0, s1, s0
494+
; CHECK-SD-NEXT: fmsub s0, s2, s0, s1
510495
; CHECK-SD-NEXT: ret
511496
;
512497
; CHECK-GI-LABEL: frem2_const_sitofp:

llvm/test/CodeGen/ARM/frem-power2.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ define float @frem4_nsz(float %x) {
4141
; CHECK-FP-NEXT: vmov s2, r0
4242
; CHECK-FP-NEXT: vdiv.f32 s4, s2, s0
4343
; CHECK-FP-NEXT: vrintz.f32 s4, s4
44-
; CHECK-FP-NEXT: vmls.f32 s2, s4, s0
44+
; CHECK-FP-NEXT: vfms.f32 s2, s4, s0
4545
; CHECK-FP-NEXT: vmov r0, s2
4646
; CHECK-FP-NEXT: bx lr
4747
;

0 commit comments

Comments
 (0)