Skip to content

Commit 2cacf46

Browse files
dtcxzywtstellar
authored andcommitted
[X86][TargetLowering] Avoid deleting temporary nodes in getNegatedExpression (llvm#139029)
In the original case, the third call to `getCheaperNegatedExpression` deletes the SDNode returned by the first call. Similar to 74e6030, this patch uses `HandleSDNodes` to prevent nodes from being deleted by subsequent calls. Closes llvm#138944. (cherry picked from commit 143cce7)
1 parent f233430 commit 2cacf46

File tree

2 files changed

+31
-1
lines changed

2 files changed

+31
-1
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54147,12 +54147,19 @@ SDValue X86TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
5414754147
if (!Flags.hasNoSignedZeros())
5414854148
break;
5414954149

54150+
// Because getCheaperNegatedExpression can delete nodes we need a handle to
54151+
// keep temporary nodes alive.
54152+
std::list<HandleSDNode> Handles;
54153+
5415054154
// This is always negatible for free but we might be able to remove some
5415154155
// extra operand negations as well.
5415254156
SmallVector<SDValue, 4> NewOps(Op.getNumOperands(), SDValue());
54153-
for (int i = 0; i != 3; ++i)
54157+
for (int i = 0; i != 3; ++i) {
5415454158
NewOps[i] = getCheaperNegatedExpression(
5415554159
Op.getOperand(i), DAG, LegalOperations, ForCodeSize, Depth + 1);
54160+
if (!!NewOps[i])
54161+
Handles.emplace_back(NewOps[i]);
54162+
}
5415654163

5415754164
bool NegA = !!NewOps[0];
5415854165
bool NegB = !!NewOps[1];

llvm/test/CodeGen/X86/pr138982.ll

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=x86_64 -mattr=+fma | FileCheck %s
3+
4+
define <4 x float> @pr138982(<4 x float> %in_vec) {
5+
; CHECK-LABEL: pr138982:
6+
; CHECK: # %bb.0: # %entry
7+
; CHECK-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
8+
; CHECK-NEXT: vrcpps %xmm0, %xmm2
9+
; CHECK-NEXT: vrcpps %xmm1, %xmm1
10+
; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
11+
; CHECK-NEXT: vcmpneqps %xmm0, %xmm3, %xmm0
12+
; CHECK-NEXT: vbroadcastss {{.*#+}} xmm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
13+
; CHECK-NEXT: vblendvps %xmm0, %xmm1, %xmm4, %xmm0
14+
; CHECK-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm3 * xmm2) + xmm0
15+
; CHECK-NEXT: retq
16+
entry:
17+
%fneg = fneg <4 x float> %in_vec
18+
%rcp = tail call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %fneg)
19+
%cmp = fcmp une <4 x float> zeroinitializer, %in_vec
20+
%sel = select <4 x i1> %cmp, <4 x float> %rcp, <4 x float> splat (float 1.000000e+00)
21+
%fma = call nsz <4 x float> @llvm.fma.v4f32(<4 x float> %rcp, <4 x float> zeroinitializer, <4 x float> %sel)
22+
ret <4 x float> %fma
23+
}

0 commit comments

Comments
 (0)