Skip to content

Commit 79931a4

Browse files
committed
preserve fast-math flags when lowering fdiv
1 parent b277841 commit 79931a4

File tree

2 files changed

+40
-12
lines changed

2 files changed

+40
-12
lines changed

llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2857,15 +2857,16 @@ static SDValue lowerFREM(SDValue Op, SelectionDAG &DAG,
28572857
SDValue X = Op->getOperand(0);
28582858
SDValue Y = Op->getOperand(1);
28592859
EVT Ty = Op.getValueType();
2860+
SDNodeFlags Flags = Op->getFlags();
28602861

2861-
SDValue Div = DAG.getNode(ISD::FDIV, DL, Ty, X, Y);
2862-
SDValue Trunc = DAG.getNode(ISD::FTRUNC, DL, Ty, Div);
2863-
SDValue Mul =
2864-
DAG.getNode(ISD::FMUL, DL, Ty, Trunc, Y, SDNodeFlags::AllowContract);
2865-
SDValue Sub =
2866-
DAG.getNode(ISD::FSUB, DL, Ty, X, Mul, SDNodeFlags::AllowContract);
2862+
SDValue Div = DAG.getNode(ISD::FDIV, DL, Ty, X, Y, Flags);
2863+
SDValue Trunc = DAG.getNode(ISD::FTRUNC, DL, Ty, Div, Flags);
2864+
SDValue Mul = DAG.getNode(ISD::FMUL, DL, Ty, Trunc, Y,
2865+
Flags | SDNodeFlags::AllowContract);
2866+
SDValue Sub = DAG.getNode(ISD::FSUB, DL, Ty, X, Mul,
2867+
Flags | SDNodeFlags::AllowContract);
28672868

2868-
if (AllowUnsafeFPMath || Op->getFlags().hasNoInfs())
2869+
if (AllowUnsafeFPMath || Flags.hasNoInfs())
28692870
return Sub;
28702871

28712872
// If Y is infinite, return X

llvm/test/CodeGen/NVPTX/frem.ll

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -222,25 +222,52 @@ define double @frem_f64_ninf(double %a, double %b) {
222222
ret double %r
223223
}
224224

225-
define float @frem_f32_imm1(float %a) {
226-
; FAST-LABEL: frem_f32_imm1(
225+
define float @frem_f32_imm1_fast(float %a) {
226+
; FAST-LABEL: frem_f32_imm1_fast(
227227
; FAST: {
228228
; FAST-NEXT: .reg .b32 %r<5>;
229229
; FAST-EMPTY:
230230
; FAST-NEXT: // %bb.0:
231-
; FAST-NEXT: ld.param.b32 %r1, [frem_f32_imm1_param_0];
231+
; FAST-NEXT: ld.param.b32 %r1, [frem_f32_imm1_fast_param_0];
232232
; FAST-NEXT: mul.f32 %r2, %r1, 0f3E124925;
233233
; FAST-NEXT: cvt.rzi.f32.f32 %r3, %r2;
234234
; FAST-NEXT: fma.rn.f32 %r4, %r3, 0fC0E00000, %r1;
235235
; FAST-NEXT: st.param.b32 [func_retval0], %r4;
236236
; FAST-NEXT: ret;
237237
;
238-
; NORMAL-LABEL: frem_f32_imm1(
238+
; NORMAL-LABEL: frem_f32_imm1_fast(
239239
; NORMAL: {
240240
; NORMAL-NEXT: .reg .b32 %r<5>;
241241
; NORMAL-EMPTY:
242242
; NORMAL-NEXT: // %bb.0:
243-
; NORMAL-NEXT: ld.param.b32 %r1, [frem_f32_imm1_param_0];
243+
; NORMAL-NEXT: ld.param.b32 %r1, [frem_f32_imm1_fast_param_0];
244+
; NORMAL-NEXT: mul.rn.f32 %r2, %r1, 0f3E124925;
245+
; NORMAL-NEXT: cvt.rzi.f32.f32 %r3, %r2;
246+
; NORMAL-NEXT: fma.rn.f32 %r4, %r3, 0fC0E00000, %r1;
247+
; NORMAL-NEXT: st.param.b32 [func_retval0], %r4;
248+
; NORMAL-NEXT: ret;
249+
%r = frem arcp float %a, 7.0
250+
ret float %r
251+
}
252+
define float @frem_f32_imm1_normal(float %a) {
253+
; FAST-LABEL: frem_f32_imm1_normal(
254+
; FAST: {
255+
; FAST-NEXT: .reg .b32 %r<5>;
256+
; FAST-EMPTY:
257+
; FAST-NEXT: // %bb.0:
258+
; FAST-NEXT: ld.param.b32 %r1, [frem_f32_imm1_normal_param_0];
259+
; FAST-NEXT: div.approx.f32 %r2, %r1, 0f40E00000;
260+
; FAST-NEXT: cvt.rzi.f32.f32 %r3, %r2;
261+
; FAST-NEXT: fma.rn.f32 %r4, %r3, 0fC0E00000, %r1;
262+
; FAST-NEXT: st.param.b32 [func_retval0], %r4;
263+
; FAST-NEXT: ret;
264+
;
265+
; NORMAL-LABEL: frem_f32_imm1_normal(
266+
; NORMAL: {
267+
; NORMAL-NEXT: .reg .b32 %r<5>;
268+
; NORMAL-EMPTY:
269+
; NORMAL-NEXT: // %bb.0:
270+
; NORMAL-NEXT: ld.param.b32 %r1, [frem_f32_imm1_normal_param_0];
244271
; NORMAL-NEXT: div.rn.f32 %r2, %r1, 0f40E00000;
245272
; NORMAL-NEXT: cvt.rzi.f32.f32 %r3, %r2;
246273
; NORMAL-NEXT: fma.rn.f32 %r4, %r3, 0fC0E00000, %r1;

0 commit comments

Comments
 (0)