AMDGPU: Don't fold rootn(x, 1) to input for strictfp functions

arsenm · arsenm · commit b1cf24a08d3e · 2024-05-17T22:20:48.000+02:00
We need to insert a constrained canonicalize.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
@@ -1163,14 +1163,19 @@ bool AMDGPULibCalls::fold_rootn(FPMathOperator *FPOp, IRBuilder<> &B,
   if (!match(opr1, m_APIntAllowPoison(CINT)))
     return false;
 
+  Function *Parent = B.GetInsertBlock()->getParent();
+
   int ci_opr1 = (int)CINT->getSExtValue();
-  if (ci_opr1 == 1) {  // rootn(x, 1) = x
-    LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " << *opr0 << "\n");
+  if (ci_opr1 == 1 && !Parent->hasFnAttribute(Attribute::StrictFP)) {
+    // rootn(x, 1) = x
+    //
+    // TODO: Insert constrained canonicalize for strictfp case.
+    LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " << *opr0 << '\n');
     replaceCall(FPOp, opr0);
     return true;
   }
 
-  Module *M = B.GetInsertBlock()->getModule();
+  Module *M = Parent->getParent();
   if (ci_opr1 == 2) { // rootn(x, 2) = sqrt(x)
     if (FunctionCallee FPExpr =
             getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) {
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-rootn.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-rootn.ll
@@ -511,7 +511,8 @@ define float @test_rootn_f32__y_1__strictfp(float %x) #1 {
 ; CHECK-LABEL: define float @test_rootn_f32__y_1__strictfp(
 ; CHECK-SAME: float [[X:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    ret float [[X]]
+; CHECK-NEXT:    [[CALL:%.*]] = tail call float @_Z5rootnfi(float [[X]], i32 1) #[[ATTR0]]
+; CHECK-NEXT:    ret float [[CALL]]
 ;
 entry:
   %call = tail call float @_Z5rootnfi(float %x, i32 1) #1
@@ -533,7 +534,8 @@ define <2 x float> @test_rootn_v2f32__y_1__strictfp(<2 x float> %x) #1 {
 ; CHECK-LABEL: define <2 x float> @test_rootn_v2f32__y_1__strictfp(
 ; CHECK-SAME: <2 x float> [[X:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    ret <2 x float> [[X]]
+; CHECK-NEXT:    [[CALL:%.*]] = tail call <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 1, i32 1>) #[[ATTR0]]
+; CHECK-NEXT:    ret <2 x float> [[CALL]]
 ;
 entry:
   %call = tail call <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> %x, <2 x i32> <i32 1, i32 1>) #1