Skip to content

Commit 6e3d2a4

Browse files
authored
[ISel] Fix another crash in new FMA DAG combine (#67818)
Following on from D135150, this patch fixes another crash caused by this DAG combine: fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E) The combine calls ReplaceAllUsesOfValueWith to replace (fmul C, D) with (fma C, D, E). This can cause nodes to get CSEd. In D135150 the problem was that the (fma C, D, E) node got CSEd away. In this new case, the problem is that the outer fadd node gets CSEd away. To fix it we have to return SDValue(N, 0) from the combine and be careful not to add a deleted node to the worklist.
1 parent 8ec50d6 commit 6e3d2a4

File tree

2 files changed

+42
-3
lines changed

2 files changed

+42
-3
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15496,7 +15496,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
1549615496
DAG.ReplaceAllUsesOfValueWith(FMul, CDE);
1549715497
// Replacing the inner FMul could cause the outer FMA to be simplified
1549815498
// away.
15499-
return FMA.getOpcode() == ISD::DELETED_NODE ? SDValue() : FMA;
15499+
return FMA.getOpcode() == ISD::DELETED_NODE ? SDValue(N, 0) : FMA;
1550015500
}
1550115501

1550215502
TmpFMA = TmpFMA->getOperand(2);
@@ -16054,7 +16054,8 @@ SDValue DAGCombiner::visitVP_FADD(SDNode *N) {
1605416054

1605516055
// FADD -> FMA combines:
1605616056
if (SDValue Fused = visitFADDForFMACombine<VPMatchContext>(N)) {
16057-
AddToWorklist(Fused.getNode());
16057+
if (Fused.getOpcode() != ISD::DELETED_NODE)
16058+
AddToWorklist(Fused.getNode());
1605816059
return Fused;
1605916060
}
1606016061
return SDValue();
@@ -16246,7 +16247,8 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
1624616247

1624716248
// FADD -> FMA combines:
1624816249
if (SDValue Fused = visitFADDForFMACombine<EmptyMatchContext>(N)) {
16249-
AddToWorklist(Fused.getNode());
16250+
if (Fused.getOpcode() != ISD::DELETED_NODE)
16251+
AddToWorklist(Fused.getNode());
1625016252
return Fused;
1625116253
}
1625216254
return SDValue();

llvm/test/CodeGen/AMDGPU/dagcombine-fma-crash.ll

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,3 +78,40 @@ bb17:
7878
%i19 = phi float [ %i12, %bb11 ], [ 0.000000e+00, %bb15 ]
7979
ret void
8080
}
81+
82+
define float @test2(float %arg, float %arg1) {
83+
; CHECK-LABEL: name: test2
84+
; CHECK: bb.0.bb:
85+
; CHECK-NEXT: liveins: $vgpr0, $vgpr1
86+
; CHECK-NEXT: {{ $}}
87+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
88+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
89+
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1082130432, implicit $exec
90+
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 1120534528
91+
; CHECK-NEXT: [[V_FMAC_F32_e64_:%[0-9]+]]:vgpr_32 = nsz contract reassoc nofpexcept V_FMAC_F32_e64 0, [[COPY]], 0, killed [[S_MOV_B32_]], 0, [[V_MOV_B32_e32_]], 0, 0, implicit $mode, implicit $exec
92+
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
93+
; CHECK-NEXT: [[V_FMAC_F32_e64_1:%[0-9]+]]:vgpr_32 = nsz contract reassoc nofpexcept V_FMAC_F32_e64 0, [[COPY1]], 0, killed [[S_MOV_B32_1]], 0, [[V_FMAC_F32_e64_]], 0, 0, implicit $mode, implicit $exec
94+
; CHECK-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nsz contract reassoc nofpexcept V_ADD_F32_e64 0, [[V_FMAC_F32_e64_1]], 0, [[V_MOV_B32_e32_]], 0, 0, implicit $mode, implicit $exec
95+
; CHECK-NEXT: [[V_RCP_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_RCP_F32_e64 0, [[V_FMAC_F32_e64_1]], 0, 0, implicit $mode, implicit $exec
96+
; CHECK-NEXT: [[V_RCP_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_RCP_F32_e64 0, killed [[V_ADD_F32_e64_]], 0, 0, implicit $mode, implicit $exec
97+
; CHECK-NEXT: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[V_RCP_F32_e64_1]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
98+
; CHECK-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 0, killed [[V_MUL_F32_e64_]], 0, [[V_RCP_F32_e64_1]], 0, 0, implicit $mode, implicit $exec
99+
; CHECK-NEXT: [[V_ADD_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, killed [[V_MAX_F32_e64_]], 0, killed [[V_RCP_F32_e64_]], 0, 0, implicit $mode, implicit $exec
100+
; CHECK-NEXT: $vgpr0 = COPY [[V_ADD_F32_e64_1]]
101+
; CHECK-NEXT: SI_RETURN implicit $vgpr0
102+
bb:
103+
%i = fmul contract float %arg1, 1.000000e+02
104+
%i2 = fmul contract float %arg, 0.000000e+00
105+
%i3 = fadd reassoc nsz contract float %i, %arg1
106+
%i4 = fadd nsz contract float %i3, %i2
107+
%i5 = fsub reassoc nsz contract float 1.000000e+00, %i4
108+
%i6 = fdiv afn float 1.000000e+00, %i5
109+
%i7 = fneg float %arg
110+
%i9 = fmul float %i6, %i7
111+
%i10 = fmul float %i6, -1.000000e+00
112+
%i13 = call float @llvm.maxnum.f32(float %i9, float %i10)
113+
%ret = fadd float %i10, %i13
114+
ret float %ret
115+
}
116+
117+
declare float @llvm.maxnum.f32(float, float)

0 commit comments

Comments
 (0)