Skip to content

Commit 102dfa8

Browse files
authored
[DAGCombiner] Allow freeze to sink through fmul by adding it to AllowMultipleMaybePoisonOperands (#142250)
Allow freeze to sink through fmul by treating it as a non-poison-generating op when operands are not poison. Adding `ISD::FMUL` to `AllowMultipleMaybePoisonOperands` lets DAG combine push freeze through fmul. This helps expose patterns like `fmul+fadd` for `FMA` fusion. When rebuilding the node, we drop flags like nnan/ninf/nsz that imply poison, but keep contract, reassoc, afn, and arcp. Closes: #141622
1 parent 5dfb7bb commit 102dfa8

File tree

2 files changed

+49
-16
lines changed

2 files changed

+49
-16
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16392,12 +16392,11 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) {
1639216392
return SDValue();
1639316393

1639416394
bool AllowMultipleMaybePoisonOperands =
16395-
N0.getOpcode() == ISD::SELECT_CC ||
16396-
N0.getOpcode() == ISD::SETCC ||
16395+
N0.getOpcode() == ISD::SELECT_CC || N0.getOpcode() == ISD::SETCC ||
1639716396
N0.getOpcode() == ISD::BUILD_VECTOR ||
1639816397
N0.getOpcode() == ISD::BUILD_PAIR ||
1639916398
N0.getOpcode() == ISD::VECTOR_SHUFFLE ||
16400-
N0.getOpcode() == ISD::CONCAT_VECTORS;
16399+
N0.getOpcode() == ISD::CONCAT_VECTORS || N0.getOpcode() == ISD::FMUL;
1640116400

1640216401
// Avoid turning a BUILD_VECTOR that can be recognized as "all zeros", "all
1640316402
// ones" or "constant" into something that depends on FrozenUndef. We can
@@ -16495,7 +16494,17 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) {
1649516494
SVN->getMask());
1649616495
} else {
1649716496
// NOTE: this strips poison generating flags.
16498-
R = DAG.getNode(N0.getOpcode(), SDLoc(N0), N0->getVTList(), Ops);
16497+
// Folding freeze(op(x, ...)) -> op(freeze(x), ...) does not require nnan,
16498+
// ninf, nsz, or fast.
16499+
// However, contract, reassoc, afn, and arcp should be preserved,
16500+
// as these fast-math flags do not introduce poison values.
16501+
SDNodeFlags SrcFlags = N0->getFlags();
16502+
SDNodeFlags SafeFlags;
16503+
SafeFlags.setAllowContract(SrcFlags.hasAllowContract());
16504+
SafeFlags.setAllowReassociation(SrcFlags.hasAllowReassociation());
16505+
SafeFlags.setApproximateFuncs(SrcFlags.hasApproximateFuncs());
16506+
SafeFlags.setAllowReciprocal(SrcFlags.hasAllowReciprocal());
16507+
R = DAG.getNode(N0.getOpcode(), SDLoc(N0), N0->getVTList(), Ops, SafeFlags);
1649916508
}
1650016509
assert(DAG.isGuaranteedNotToBeUndefOrPoison(R, /*PoisonOnly*/ false) &&
1650116510
"Can't create node that may be undef/poison!");

llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll

Lines changed: 36 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,7 @@ define float @fma_from_freeze_mul_add_left_with_nnan(float %x, float %y) {
1717
; CHECK-LABEL: fma_from_freeze_mul_add_left_with_nnan:
1818
; CHECK: ; %bb.0:
1919
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20-
; CHECK-NEXT: v_mul_f32_e32 v0, v0, v1
21-
; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
22-
; CHECK-NEXT: v_add_f32_e32 v0, 1.0, v0
20+
; CHECK-NEXT: v_fma_f32 v0, v0, v1, 1.0
2321
; CHECK-NEXT: s_setpc_b64 s[30:31]
2422
%mul = fmul nnan contract afn float %x, %y
2523
%mul.fr = freeze float %mul
@@ -43,9 +41,7 @@ define float @fma_from_freeze_mul_add_right_with_nnan(float %x, float %y) {
4341
; CHECK-LABEL: fma_from_freeze_mul_add_right_with_nnan:
4442
; CHECK: ; %bb.0:
4543
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
46-
; CHECK-NEXT: v_mul_f32_e32 v0, v0, v1
47-
; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
48-
; CHECK-NEXT: v_add_f32_e32 v0, 1.0, v0
44+
; CHECK-NEXT: v_fma_f32 v0, v0, v1, 1.0
4945
; CHECK-NEXT: s_setpc_b64 s[30:31]
5046
%mul = fmul nnan contract float %x, %y
5147
%mul.fr = freeze float %mul
@@ -69,9 +65,7 @@ define float @fma_from_freeze_mul_sub_left_with_nnan(float %x, float %y) {
6965
; CHECK-LABEL: fma_from_freeze_mul_sub_left_with_nnan:
7066
; CHECK: ; %bb.0:
7167
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
72-
; CHECK-NEXT: v_mul_f32_e32 v0, v0, v1
73-
; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
74-
; CHECK-NEXT: v_add_f32_e32 v0, -1.0, v0
68+
; CHECK-NEXT: v_fma_f32 v0, v0, v1, -1.0
7569
; CHECK-NEXT: s_setpc_b64 s[30:31]
7670
%mul = fmul nnan contract float %x, %y
7771
%mul.fr = freeze float %mul
@@ -95,12 +89,42 @@ define float @fma_from_freeze_mul_sub_right_with_nnan(float %x, float %y) {
9589
; CHECK-LABEL: fma_from_freeze_mul_sub_right_with_nnan:
9690
; CHECK: ; %bb.0:
9791
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
98-
; CHECK-NEXT: v_mul_f32_e32 v0, v0, v1
99-
; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
100-
; CHECK-NEXT: v_sub_f32_e32 v0, 1.0, v0
92+
; CHECK-NEXT: v_fma_f32 v0, -v0, v1, 1.0
10193
; CHECK-NEXT: s_setpc_b64 s[30:31]
10294
%mul = fmul nnan contract float %x, %y
10395
%mul.fr = freeze float %mul
10496
%sub = fsub nnan contract float 1.000000e+00, %mul.fr
10597
ret float %sub
10698
}
99+
100+
define float @fma_freeze_sink_multiple_maybe_poison_nnan_add(float %x, float %y) {
101+
; CHECK-LABEL: fma_freeze_sink_multiple_maybe_poison_nnan_add:
102+
; CHECK: ; %bb.0:
103+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
104+
; CHECK-NEXT: v_dual_subrev_f32 v0, 1.0, v0 :: v_dual_add_f32 v1, 1.0, v1
105+
; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
106+
; CHECK-NEXT: v_fma_f32 v0, v0, v1, 1.0
107+
; CHECK-NEXT: s_setpc_b64 s[30:31]
108+
%fsub_x = fsub nnan contract float %x, 1.000000e+00
109+
%fadd_y = fadd nnan contract float %y, 1.000000e+00
110+
%mul = fmul nnan contract float %fsub_x, %fadd_y
111+
%mul.fr = freeze float %mul
112+
%add = fadd nnan contract float %mul.fr, 1.000000e+00
113+
ret float %add
114+
}
115+
116+
define float @fma_freeze_sink_multiple_maybe_poison_nnan_sub(float %x, float %y) {
117+
; CHECK-LABEL: fma_freeze_sink_multiple_maybe_poison_nnan_sub:
118+
; CHECK: ; %bb.0:
119+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
120+
; CHECK-NEXT: v_dual_add_f32 v0, 1.0, v0 :: v_dual_add_f32 v1, -1.0, v1
121+
; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
122+
; CHECK-NEXT: v_fma_f32 v0, v0, v1, -1.0
123+
; CHECK-NEXT: s_setpc_b64 s[30:31]
124+
%fadd_x = fadd nnan contract float %x, 1.000000e+00
125+
%fsub_y = fsub nnan contract float %y, 1.000000e+00
126+
%mul = fmul nnan contract float %fadd_x, %fsub_y
127+
%mul.fr = freeze float %mul
128+
%sub = fsub nnan contract float %mul.fr, 1.000000e+00
129+
ret float %sub
130+
}

0 commit comments

Comments
 (0)