AMDGPU: Fold undef rcp to qnan

arsenm · arsenm · commit ada6aa3f5c96 · 2022-11-04T15:49:37.000-07:00
This matches the behavior in instcombine, and for fdiv.
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -9991,8 +9991,11 @@ SDValue SITargetLowering::performRcpCombine(SDNode *N,
   EVT VT = N->getValueType(0);
   SDValue N0 = N->getOperand(0);
 
-  if (N0.isUndef())
-    return N0;
+  if (N0.isUndef()) {
+    return DCI.DAG.getConstantFP(
+        APFloat::getQNaN(SelectionDAG::EVTToAPFloatSemantics(VT)), SDLoc(N),
+        VT);
+  }
 
   if (VT == MVT::f32 && (N0.getOpcode() == ISD::UINT_TO_FP ||
                          N0.getOpcode() == ISD::SINT_TO_FP)) {
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll
@@ -7,7 +7,9 @@ declare double @llvm.sqrt.f64(double) #0
 declare float @llvm.sqrt.f32(float) #0
 
 ; FUNC-LABEL: {{^}}rcp_undef_f32:
-; SI-NOT: v_rcp_f32
+; SI: v_mov_b32_e32 [[NAN:v[0-9]+]], 0x7fc00000
+; SI-NOT: [[NAN]]
+; SI: buffer_store_dword [[NAN]]
 define amdgpu_kernel void @rcp_undef_f32(float addrspace(1)* %out) #1 {
   %rcp = call float @llvm.amdgcn.rcp.f32(float undef)
   store float %rcp, float addrspace(1)* %out, align 4
diff --git a/llvm/test/CodeGen/AMDGPU/select-undef.ll b/llvm/test/CodeGen/AMDGPU/select-undef.ll
@@ -1,13 +1,12 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -amdgpu-scalar-ir-passes=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
 
 ; GCN-LABEL: {{^}}select_undef_lhs:
 ; GCN: s_waitcnt
 ; GCN-NOT: v_cmp
 ; GCN-NOT: v_cndmask
 ; GCN-NEXT: s_setpc_b64
 define float @select_undef_lhs(float %val, i1 %cond) {
-  %undef = call float @llvm.amdgcn.rcp.f32(float undef)
-  %sel = select i1 %cond, float %undef, float %val
+  %sel = select i1 %cond, float undef, float %val
   ret float %sel
 }
 
@@ -17,8 +16,7 @@ define float @select_undef_lhs(float %val, i1 %cond) {
 ; GCN-NOT: v_cndmask
 ; GCN-NEXT: s_setpc_b64
 define float @select_undef_rhs(float %val, i1 %cond) {
-  %undef = call float @llvm.amdgcn.rcp.f32(float undef)
-  %sel = select i1 %cond, float %val, float %undef
+  %sel = select i1 %cond, float %val, float undef
   ret float %sel
 }