Skip to content

Commit e36a339

Browse files
SC llvm teamSC llvm team
authored andcommitted
Merged main:4c98f5b439ddd204d8ff1e423104215ebd0e1720 into amd-gfx:bfb7a657b78d
Local branch amd-gfx bfb7a65 Merged main:ba8a2ade84f4c1bfc531fe3673470377c038f31d into amd-gfx:de5d17eeaae8 Remote branch main 4c98f5b [DAG] Use copysign in frem power-2 fold. (llvm#91751)
2 parents bfb7a65 + 4c98f5b commit e36a339

File tree

4 files changed

+118
-19
lines changed

4 files changed

+118
-19
lines changed

llvm/include/llvm/Config/llvm-config.h.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
/* Indicate that this is LLVM compiled from the amd-gfx branch. */
1818
#define LLVM_HAVE_BRANCH_AMD_GFX
19-
#define LLVM_MAIN_REVISION 499109
19+
#define LLVM_MAIN_REVISION 499110
2020

2121
/* Define if LLVM_ENABLE_DUMP is enabled */
2222
#cmakedefine LLVM_ENABLE_DUMP

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17386,15 +17386,20 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
1738617386
TLI.isOperationLegalOrCustom(ISD::FMUL, VT) &&
1738717387
TLI.isOperationLegalOrCustom(ISD::FDIV, VT) &&
1738817388
TLI.isOperationLegalOrCustom(ISD::FTRUNC, VT) &&
17389-
DAG.isKnownToBeAPowerOfTwoFP(N1) &&
17390-
(Flags.hasNoSignedZeros() || DAG.cannotBeOrderedNegativeFP(N0))) {
17389+
DAG.isKnownToBeAPowerOfTwoFP(N1)) {
17390+
bool NeedsCopySign =
17391+
!Flags.hasNoSignedZeros() && !DAG.cannotBeOrderedNegativeFP(N0);
1739117392
SDValue Div = DAG.getNode(ISD::FDIV, DL, VT, N0, N1);
1739217393
SDValue Rnd = DAG.getNode(ISD::FTRUNC, DL, VT, Div);
17393-
if (TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT))
17394-
return DAG.getNode(ISD::FMA, DL, VT, DAG.getNode(ISD::FNEG, DL, VT, Rnd),
17395-
N1, N0);
17396-
SDValue Mul = DAG.getNode(ISD::FMUL, DL, VT, Rnd, N1);
17397-
return DAG.getNode(ISD::FSUB, DL, VT, N0, Mul);
17394+
SDValue MLA;
17395+
if (TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT)) {
17396+
MLA = DAG.getNode(ISD::FMA, DL, VT, DAG.getNode(ISD::FNEG, DL, VT, Rnd),
17397+
N1, N0);
17398+
} else {
17399+
SDValue Mul = DAG.getNode(ISD::FMUL, DL, VT, Rnd, N1);
17400+
MLA = DAG.getNode(ISD::FSUB, DL, VT, N0, Mul);
17401+
}
17402+
return NeedsCopySign ? DAG.getNode(ISD::FCOPYSIGN, DL, VT, MLA, N0) : MLA;
1739817403
}
1739917404

1740017405
return SDValue();

llvm/test/CodeGen/AArch64/frem-power2.ll

Lines changed: 85 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,22 @@
33
; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
44

55
define float @frem2(float %x) {
6-
; CHECK-LABEL: frem2:
7-
; CHECK: // %bb.0: // %entry
8-
; CHECK-NEXT: fmov s1, #2.00000000
9-
; CHECK-NEXT: b fmodf
6+
; CHECK-SD-LABEL: frem2:
7+
; CHECK-SD: // %bb.0: // %entry
8+
; CHECK-SD-NEXT: fmov s1, #2.00000000
9+
; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
10+
; CHECK-SD-NEXT: fdiv s2, s0, s1
11+
; CHECK-SD-NEXT: frintz s2, s2
12+
; CHECK-SD-NEXT: fmsub s1, s2, s1, s0
13+
; CHECK-SD-NEXT: mvni v2.4s, #128, lsl #24
14+
; CHECK-SD-NEXT: bit v0.16b, v1.16b, v2.16b
15+
; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0
16+
; CHECK-SD-NEXT: ret
17+
;
18+
; CHECK-GI-LABEL: frem2:
19+
; CHECK-GI: // %bb.0: // %entry
20+
; CHECK-GI-NEXT: fmov s1, #2.00000000
21+
; CHECK-GI-NEXT: b fmodf
1022
entry:
1123
%fmod = frem float %x, 2.0
1224
ret float %fmod
@@ -311,6 +323,67 @@ entry:
311323
ret float %fmod
312324
}
313325

326+
define <4 x float> @frem2_vec(<4 x float> %x) {
327+
; CHECK-SD-LABEL: frem2_vec:
328+
; CHECK-SD: // %bb.0: // %entry
329+
; CHECK-SD-NEXT: movi v1.4s, #64, lsl #24
330+
; CHECK-SD-NEXT: mov v3.16b, v0.16b
331+
; CHECK-SD-NEXT: fdiv v2.4s, v0.4s, v1.4s
332+
; CHECK-SD-NEXT: frintz v2.4s, v2.4s
333+
; CHECK-SD-NEXT: fmls v3.4s, v1.4s, v2.4s
334+
; CHECK-SD-NEXT: mvni v1.4s, #128, lsl #24
335+
; CHECK-SD-NEXT: bit v0.16b, v3.16b, v1.16b
336+
; CHECK-SD-NEXT: ret
337+
;
338+
; CHECK-GI-LABEL: frem2_vec:
339+
; CHECK-GI: // %bb.0: // %entry
340+
; CHECK-GI-NEXT: sub sp, sp, #80
341+
; CHECK-GI-NEXT: str d10, [sp, #48] // 8-byte Folded Spill
342+
; CHECK-GI-NEXT: stp d9, d8, [sp, #56] // 16-byte Folded Spill
343+
; CHECK-GI-NEXT: str x30, [sp, #72] // 8-byte Folded Spill
344+
; CHECK-GI-NEXT: .cfi_def_cfa_offset 80
345+
; CHECK-GI-NEXT: .cfi_offset w30, -8
346+
; CHECK-GI-NEXT: .cfi_offset b8, -16
347+
; CHECK-GI-NEXT: .cfi_offset b9, -24
348+
; CHECK-GI-NEXT: .cfi_offset b10, -32
349+
; CHECK-GI-NEXT: fmov s1, #2.00000000
350+
; CHECK-GI-NEXT: mov s8, v0.s[1]
351+
; CHECK-GI-NEXT: mov s9, v0.s[2]
352+
; CHECK-GI-NEXT: mov s10, v0.s[3]
353+
; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0
354+
; CHECK-GI-NEXT: bl fmodf
355+
; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
356+
; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
357+
; CHECK-GI-NEXT: fmov s1, #2.00000000
358+
; CHECK-GI-NEXT: fmov s0, s8
359+
; CHECK-GI-NEXT: bl fmodf
360+
; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
361+
; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
362+
; CHECK-GI-NEXT: fmov s1, #2.00000000
363+
; CHECK-GI-NEXT: fmov s0, s9
364+
; CHECK-GI-NEXT: bl fmodf
365+
; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
366+
; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill
367+
; CHECK-GI-NEXT: fmov s1, #2.00000000
368+
; CHECK-GI-NEXT: fmov s0, s10
369+
; CHECK-GI-NEXT: bl fmodf
370+
; CHECK-GI-NEXT: ldp q2, q1, [sp, #16] // 32-byte Folded Reload
371+
; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
372+
; CHECK-GI-NEXT: ldr x30, [sp, #72] // 8-byte Folded Reload
373+
; CHECK-GI-NEXT: ldp d9, d8, [sp, #56] // 16-byte Folded Reload
374+
; CHECK-GI-NEXT: ldr d10, [sp, #48] // 8-byte Folded Reload
375+
; CHECK-GI-NEXT: mov v1.s[1], v2.s[0]
376+
; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload
377+
; CHECK-GI-NEXT: mov v1.s[2], v2.s[0]
378+
; CHECK-GI-NEXT: mov v1.s[3], v0.s[0]
379+
; CHECK-GI-NEXT: mov v0.16b, v1.16b
380+
; CHECK-GI-NEXT: add sp, sp, #80
381+
; CHECK-GI-NEXT: ret
382+
entry:
383+
%fmod = frem <4 x float> %x, <float 2.0, float 2.0, float 2.0, float 2.0>
384+
ret <4 x float> %fmod
385+
}
386+
314387
define <4 x float> @frem2_nsz_vec(<4 x float> %x) {
315388
; CHECK-SD-LABEL: frem2_nsz_vec:
316389
; CHECK-SD: // %bb.0: // %entry
@@ -514,10 +587,15 @@ define float @frem2_constneg_sitofp(float %x, i32 %sa) {
514587
; CHECK-SD-LABEL: frem2_constneg_sitofp:
515588
; CHECK-SD: // %bb.0: // %entry
516589
; CHECK-SD-NEXT: mov w8, #1 // =0x1
517-
; CHECK-SD-NEXT: fmov s0, #-12.50000000
590+
; CHECK-SD-NEXT: fmov s1, #-12.50000000
518591
; CHECK-SD-NEXT: lsl w8, w8, w0
519-
; CHECK-SD-NEXT: scvtf s1, w8
520-
; CHECK-SD-NEXT: b fmodf
592+
; CHECK-SD-NEXT: scvtf s0, w8
593+
; CHECK-SD-NEXT: fdiv s2, s1, s0
594+
; CHECK-SD-NEXT: frintz s2, s2
595+
; CHECK-SD-NEXT: fmsub s0, s2, s0, s1
596+
; CHECK-SD-NEXT: fabs s0, s0
597+
; CHECK-SD-NEXT: fneg s0, s0
598+
; CHECK-SD-NEXT: ret
521599
;
522600
; CHECK-GI-LABEL: frem2_constneg_sitofp:
523601
; CHECK-GI: // %bb.0: // %entry

llvm/test/CodeGen/ARM/frem-power2.ll

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,29 @@ define float @frem4(float %x) {
1414
;
1515
; CHECK-FP-LABEL: frem4:
1616
; CHECK-FP: @ %bb.0: @ %entry
17-
; CHECK-FP-NEXT: mov.w r1, #1082130432
18-
; CHECK-FP-NEXT: b fmodf
17+
; CHECK-FP-NEXT: vmov.f32 s0, #4.000000e+00
18+
; CHECK-FP-NEXT: vmov s2, r0
19+
; CHECK-FP-NEXT: lsrs r0, r0, #31
20+
; CHECK-FP-NEXT: vdiv.f32 s4, s2, s0
21+
; CHECK-FP-NEXT: vrintz.f32 s4, s4
22+
; CHECK-FP-NEXT: vfms.f32 s2, s4, s0
23+
; CHECK-FP-NEXT: vmov r1, s2
24+
; CHECK-FP-NEXT: bfi r1, r0, #31, #1
25+
; CHECK-FP-NEXT: mov r0, r1
26+
; CHECK-FP-NEXT: bx lr
1927
;
2028
; CHECK-M33-LABEL: frem4:
2129
; CHECK-M33: @ %bb.0: @ %entry
22-
; CHECK-M33-NEXT: mov.w r1, #1082130432
23-
; CHECK-M33-NEXT: b fmodf
30+
; CHECK-M33-NEXT: vmov.f32 s0, #4.000000e+00
31+
; CHECK-M33-NEXT: vmov s2, r0
32+
; CHECK-M33-NEXT: lsrs r0, r0, #31
33+
; CHECK-M33-NEXT: vdiv.f32 s4, s2, s0
34+
; CHECK-M33-NEXT: vrintz.f32 s4, s4
35+
; CHECK-M33-NEXT: vmls.f32 s2, s4, s0
36+
; CHECK-M33-NEXT: vmov r1, s2
37+
; CHECK-M33-NEXT: bfi r1, r0, #31, #1
38+
; CHECK-M33-NEXT: mov r0, r1
39+
; CHECK-M33-NEXT: bx lr
2440
entry:
2541
%fmod = frem float %x, 4.0
2642
ret float %fmod

0 commit comments

Comments
 (0)