Skip to content

Commit 143cce7

Browse files
authored
[X86][TargetLowering] Avoid deleting temporary nodes in getNegatedExpression (#139029)
In the original case, the third call to `getCheaperNegatedExpression` deletes the SDNode returned by the first call. Similar to 74e6030, this patch uses `HandleSDNodes` to prevent nodes from being deleted by subsequent calls. Closes #138944.
1 parent a230bb0 commit 143cce7

File tree

2 files changed

+31
-1
lines changed

2 files changed

+31
-1
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54726,12 +54726,19 @@ SDValue X86TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
5472654726
if (!Flags.hasNoSignedZeros())
5472754727
break;
5472854728

54729+
// Because getCheaperNegatedExpression can delete nodes we need a handle to
54730+
// keep temporary nodes alive.
54731+
std::list<HandleSDNode> Handles;
54732+
5472954733
// This is always negatible for free but we might be able to remove some
5473054734
// extra operand negations as well.
5473154735
SmallVector<SDValue, 4> NewOps(Op.getNumOperands(), SDValue());
54732-
for (int i = 0; i != 3; ++i)
54736+
for (int i = 0; i != 3; ++i) {
5473354737
NewOps[i] = getCheaperNegatedExpression(
5473454738
Op.getOperand(i), DAG, LegalOperations, ForCodeSize, Depth + 1);
54739+
if (!!NewOps[i])
54740+
Handles.emplace_back(NewOps[i]);
54741+
}
5473554742

5473654743
bool NegA = !!NewOps[0];
5473754744
bool NegB = !!NewOps[1];

llvm/test/CodeGen/X86/pr138982.ll

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=x86_64 -mattr=+fma | FileCheck %s
3+
4+
define <4 x float> @pr138982(<4 x float> %in_vec) {
5+
; CHECK-LABEL: pr138982:
6+
; CHECK: # %bb.0: # %entry
7+
; CHECK-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
8+
; CHECK-NEXT: vrcpps %xmm0, %xmm2
9+
; CHECK-NEXT: vrcpps %xmm1, %xmm1
10+
; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
11+
; CHECK-NEXT: vcmpneqps %xmm0, %xmm3, %xmm0
12+
; CHECK-NEXT: vbroadcastss {{.*#+}} xmm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
13+
; CHECK-NEXT: vblendvps %xmm0, %xmm1, %xmm4, %xmm0
14+
; CHECK-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm3 * xmm2) + xmm0
15+
; CHECK-NEXT: retq
16+
entry:
17+
%fneg = fneg <4 x float> %in_vec
18+
%rcp = tail call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %fneg)
19+
%cmp = fcmp une <4 x float> zeroinitializer, %in_vec
20+
%sel = select <4 x i1> %cmp, <4 x float> %rcp, <4 x float> splat (float 1.000000e+00)
21+
%fma = call nsz <4 x float> @llvm.fma.v4f32(<4 x float> %rcp, <4 x float> zeroinitializer, <4 x float> %sel)
22+
ret <4 x float> %fma
23+
}

0 commit comments

Comments
 (0)