Skip to content

Commit b1cf24a

Browse files
committed
AMDGPU: Don't fold rootn(x, 1) to input for strictfp functions
We need to insert a constrained canonicalize.
1 parent bf8e647 commit b1cf24a

File tree

2 files changed

+12
-5
lines changed

2 files changed

+12
-5
lines changed

llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1163,14 +1163,19 @@ bool AMDGPULibCalls::fold_rootn(FPMathOperator *FPOp, IRBuilder<> &B,
11631163
if (!match(opr1, m_APIntAllowPoison(CINT)))
11641164
return false;
11651165

1166+
Function *Parent = B.GetInsertBlock()->getParent();
1167+
11661168
int ci_opr1 = (int)CINT->getSExtValue();
1167-
if (ci_opr1 == 1) { // rootn(x, 1) = x
1168-
LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " << *opr0 << "\n");
1169+
if (ci_opr1 == 1 && !Parent->hasFnAttribute(Attribute::StrictFP)) {
1170+
// rootn(x, 1) = x
1171+
//
1172+
// TODO: Insert constrained canonicalize for strictfp case.
1173+
LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " << *opr0 << '\n');
11691174
replaceCall(FPOp, opr0);
11701175
return true;
11711176
}
11721177

1173-
Module *M = B.GetInsertBlock()->getModule();
1178+
Module *M = Parent->getParent();
11741179
if (ci_opr1 == 2) { // rootn(x, 2) = sqrt(x)
11751180
if (FunctionCallee FPExpr =
11761181
getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) {

llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-rootn.ll

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -511,7 +511,8 @@ define float @test_rootn_f32__y_1__strictfp(float %x) #1 {
511511
; CHECK-LABEL: define float @test_rootn_f32__y_1__strictfp(
512512
; CHECK-SAME: float [[X:%.*]]) #[[ATTR0:[0-9]+]] {
513513
; CHECK-NEXT: entry:
514-
; CHECK-NEXT: ret float [[X]]
514+
; CHECK-NEXT: [[CALL:%.*]] = tail call float @_Z5rootnfi(float [[X]], i32 1) #[[ATTR0]]
515+
; CHECK-NEXT: ret float [[CALL]]
515516
;
516517
entry:
517518
%call = tail call float @_Z5rootnfi(float %x, i32 1) #1
@@ -533,7 +534,8 @@ define <2 x float> @test_rootn_v2f32__y_1__strictfp(<2 x float> %x) #1 {
533534
; CHECK-LABEL: define <2 x float> @test_rootn_v2f32__y_1__strictfp(
534535
; CHECK-SAME: <2 x float> [[X:%.*]]) #[[ATTR0]] {
535536
; CHECK-NEXT: entry:
536-
; CHECK-NEXT: ret <2 x float> [[X]]
537+
; CHECK-NEXT: [[CALL:%.*]] = tail call <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 1, i32 1>) #[[ATTR0]]
538+
; CHECK-NEXT: ret <2 x float> [[CALL]]
537539
;
538540
entry:
539541
%call = tail call <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> %x, <2 x i32> <i32 1, i32 1>) #1

0 commit comments

Comments
 (0)