Skip to content

Commit ada6aa3

Browse files
committed
AMDGPU: Fold undef rcp to qnan
This matches the behavior in instcombine, and for fdiv.
1 parent a38db7b commit ada6aa3

File tree

3 files changed

+11
-8
lines changed

3 files changed

+11
-8
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9991,8 +9991,11 @@ SDValue SITargetLowering::performRcpCombine(SDNode *N,
99919991
EVT VT = N->getValueType(0);
99929992
SDValue N0 = N->getOperand(0);
99939993

9994-
if (N0.isUndef())
9995-
return N0;
9994+
if (N0.isUndef()) {
9995+
return DCI.DAG.getConstantFP(
9996+
APFloat::getQNaN(SelectionDAG::EVTToAPFloatSemantics(VT)), SDLoc(N),
9997+
VT);
9998+
}
99969999

999710000
if (VT == MVT::f32 && (N0.getOpcode() == ISD::UINT_TO_FP ||
999810001
N0.getOpcode() == ISD::SINT_TO_FP)) {

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,9 @@ declare double @llvm.sqrt.f64(double) #0
77
declare float @llvm.sqrt.f32(float) #0
88

99
; FUNC-LABEL: {{^}}rcp_undef_f32:
10-
; SI-NOT: v_rcp_f32
10+
; SI: v_mov_b32_e32 [[NAN:v[0-9]+]], 0x7fc00000
11+
; SI-NOT: [[NAN]]
12+
; SI: buffer_store_dword [[NAN]]
1113
define amdgpu_kernel void @rcp_undef_f32(float addrspace(1)* %out) #1 {
1214
%rcp = call float @llvm.amdgcn.rcp.f32(float undef)
1315
store float %rcp, float addrspace(1)* %out, align 4

llvm/test/CodeGen/AMDGPU/select-undef.ll

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,12 @@
1-
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
1+
; RUN: llc -amdgpu-scalar-ir-passes=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
22

33
; GCN-LABEL: {{^}}select_undef_lhs:
44
; GCN: s_waitcnt
55
; GCN-NOT: v_cmp
66
; GCN-NOT: v_cndmask
77
; GCN-NEXT: s_setpc_b64
88
define float @select_undef_lhs(float %val, i1 %cond) {
9-
%undef = call float @llvm.amdgcn.rcp.f32(float undef)
10-
%sel = select i1 %cond, float %undef, float %val
9+
%sel = select i1 %cond, float undef, float %val
1110
ret float %sel
1211
}
1312

@@ -17,8 +16,7 @@ define float @select_undef_lhs(float %val, i1 %cond) {
1716
; GCN-NOT: v_cndmask
1817
; GCN-NEXT: s_setpc_b64
1918
define float @select_undef_rhs(float %val, i1 %cond) {
20-
%undef = call float @llvm.amdgcn.rcp.f32(float undef)
21-
%sel = select i1 %cond, float %val, float %undef
19+
%sel = select i1 %cond, float %val, float undef
2220
ret float %sel
2321
}
2422

0 commit comments

Comments
 (0)