AMDGPU: Fix fcanonicalize constant folding not correctly handling -0.0

arsenm · arsenm · commit fe56afc4d74e · 2022-11-18T10:03:29.000-08:00
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -10284,8 +10284,10 @@ bool SITargetLowering::isCanonicalized(Register Reg, MachineFunction &MF,
 SDValue SITargetLowering::getCanonicalConstantFP(
   SelectionDAG &DAG, const SDLoc &SL, EVT VT, const APFloat &C) const {
   // Flush denormals to 0 if not enabled.
-  if (C.isDenormal() && !denormalsEnabledForType(DAG, VT))
-    return DAG.getConstantFP(0.0, SL, VT);
+  if (C.isDenormal() && !denormalsEnabledForType(DAG, VT)) {
+    return DAG.getConstantFP(APFloat::getZero(C.getSemantics(),
+                                              C.isNegative()), SL, VT);
+  }
 
   if (C.isNaN()) {
     APFloat CanonicalQNaN = APFloat::getQNaN(C.getSemantics());
diff --git a/llvm/test/CodeGen/AMDGPU/fcanonicalize.ll b/llvm/test/CodeGen/AMDGPU/fcanonicalize.ll
@@ -148,7 +148,7 @@ define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal0_f32(float
 }
 
 ; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal1_f32:
-; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
+; GCN: v_bfrev_b32_e32 [[REG:v[0-9]+]], 1{{$}}
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[REG]]
 define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal1_f32(float addrspace(1)* %out) #1 {
   %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2155872255 to float))
@@ -353,7 +353,7 @@ define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal0_f64(double
 
 ; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal1_f64:
 ; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
-; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], v[[LO]]{{$}}
+; GCN: v_bfrev_b32_e32 v[[HI:[0-9]+]], 1{{$}}
 ; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
 define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal1_f64(double addrspace(1)* %out) #2 {
   %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9227875636482146303 to double))