Skip to content

Commit 67fbdf1

Browse files
committed
[SROA] Propagate no-signed-zeros(nsz) fast-math flag on the phi node using function attribute
Its expected that the sequence return X > 0.0 ? X : -X, compiled with -Ofast, produces fabs intrinsic. However, at this point, LLVM is unable to do so. The above sequence goes through the following transformation during the pass pipeline: SROA pass generates the phi node. Here, it does not infer the fast-math flags on the phi node unlike clang frontend typically does. Phi node eventually gets translated into select instruction. Because of missing no-signed-zeros(nsz) fast-math flag on the select instruction, InstCombine pass fails to fold the sequence into fabs intrinsic. This patch, as a part of SROA, tries to propagate nsz fast-math flag on the phi node using function attribute enabling this folding. Co-authored-by: Sushant Gokhale <[email protected]>
1 parent 7e9b949 commit 67fbdf1

File tree

2 files changed

+91
-0
lines changed

2 files changed

+91
-0
lines changed

llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
#include "llvm/IR/Intrinsics.h"
4242
#include "llvm/IR/LLVMContext.h"
4343
#include "llvm/IR/Module.h"
44+
#include "llvm/IR/Operator.h"
4445
#include "llvm/IR/Type.h"
4546
#include "llvm/IR/User.h"
4647
#include "llvm/Support/Casting.h"
@@ -1113,6 +1114,17 @@ void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred,
11131114
for (unsigned i = 0; i != NumEdges; ++i)
11141115
APN->addIncoming(IncomingVals[AllocaNo], Pred);
11151116

1117+
// For the sequence `return X > 0.0 ? X : -X`, it is expected that this
1118+
// results in fabs intrinsic. However, without no-signed-zeros(nsz) flag
1119+
// on the phi node generated at this stage, fabs folding does not
1120+
// happen. So, we try to infer nsz flag from the function attributes to
1121+
// enable this fabs folding.
1122+
if (APN->isComplete() && isa<FPMathOperator>(APN) &&
1123+
BB->getParent()
1124+
->getFnAttribute("no-signed-zeros-fp-math")
1125+
.getValueAsBool())
1126+
APN->setHasNoSignedZeros(true);
1127+
11161128
// The currently active variable for this block is now the PHI.
11171129
IncomingVals[AllocaNo] = APN;
11181130
AllocaATInfo[AllocaNo].updateForNewPhi(APN, DIB);
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2+
; RUN: opt < %s -passes='sroa' -S | FileCheck %s
3+
define double @phi_with_nsz(double %x) "no-signed-zeros-fp-math"="true" {
4+
; CHECK-LABEL: define double @phi_with_nsz(
5+
; CHECK-SAME: double [[X:%.*]]) #[[ATTR0:[0-9]+]] {
6+
; CHECK-NEXT: entry:
7+
; CHECK-NEXT: [[CMP:%.*]] = fcmp olt double [[X]], 0.000000e+00
8+
; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[RETURN:%.*]]
9+
; CHECK: if.then:
10+
; CHECK-NEXT: [[FNEG:%.*]] = fneg double [[X]]
11+
; CHECK-NEXT: br label [[RETURN]]
12+
; CHECK: return:
13+
; CHECK-NEXT: [[X_ADDR_0:%.*]] = phi nsz double [ [[FNEG]], [[IF_THEN]] ], [ undef, [[ENTRY:%.*]] ]
14+
; CHECK-NEXT: ret double [[X_ADDR_0]]
15+
entry:
16+
%x.addr = alloca double
17+
%cmp = fcmp olt double %x, 0.0
18+
br i1 %cmp, label %if.then, label %return
19+
20+
if.then: ; preds = %entry
21+
%fneg = fneg double %x
22+
store double %fneg, ptr %x.addr
23+
br label %return
24+
25+
return: ; preds = %entry,%if.then
26+
%retval = load double, ptr %x.addr
27+
ret double %retval
28+
}
29+
30+
define <2 x double> @vector_phi_with_nsz(<2 x double> %x, i1 %cmp, <2 x double> %a, <2 x double> %b) "no-signed-zeros-fp-math"="true" {
31+
; CHECK-LABEL: define <2 x double> @vector_phi_with_nsz(
32+
; CHECK-SAME: <2 x double> [[X:%.*]], i1 [[CMP:%.*]], <2 x double> [[A:%.*]], <2 x double> [[B:%.*]]) #[[ATTR0]] {
33+
; CHECK-NEXT: entry:
34+
; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[RETURN:%.*]]
35+
; CHECK: if.then:
36+
; CHECK-NEXT: br label [[RETURN]]
37+
; CHECK: return:
38+
; CHECK-NEXT: [[X_ADDR_0:%.*]] = phi nsz <2 x double> [ [[B]], [[IF_THEN]] ], [ [[A]], [[ENTRY:%.*]] ]
39+
; CHECK-NEXT: ret <2 x double> [[X_ADDR_0]]
40+
entry:
41+
%x.addr = alloca <2 x double>
42+
store <2 x double> %a, ptr %x.addr
43+
br i1 %cmp, label %if.then, label %return
44+
45+
if.then: ; preds = %entry
46+
store <2 x double> %b, ptr %x.addr
47+
br label %return
48+
49+
return: ; preds = %entry,%if.then
50+
%retval = load <2 x double>, ptr %x.addr
51+
ret <2 x double> %retval
52+
}
53+
54+
define double @phi_without_nsz(double %x) "no-signed-zeros-fp-math"="false" {
55+
; CHECK-LABEL: define double @phi_without_nsz(
56+
; CHECK-SAME: double [[X:%.*]]) #[[ATTR1:[0-9]+]] {
57+
; CHECK-NEXT: entry:
58+
; CHECK-NEXT: [[CMP:%.*]] = fcmp olt double [[X]], 0.000000e+00
59+
; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[RETURN:%.*]]
60+
; CHECK: if.then:
61+
; CHECK-NEXT: [[FNEG:%.*]] = fneg double [[X]]
62+
; CHECK-NEXT: br label [[RETURN]]
63+
; CHECK: return:
64+
; CHECK-NEXT: [[X_ADDR_0:%.*]] = phi double [ [[FNEG]], [[IF_THEN]] ], [ undef, [[ENTRY:%.*]] ]
65+
; CHECK-NEXT: ret double [[X_ADDR_0]]
66+
entry:
67+
%x.addr = alloca double
68+
%cmp = fcmp olt double %x, 0.0
69+
br i1 %cmp, label %if.then, label %return
70+
71+
if.then: ; preds = %entry
72+
%fneg = fneg double %x
73+
store double %fneg, ptr %x.addr
74+
br label %return
75+
76+
return: ; preds = %entry,%if.then
77+
%retval = load double, ptr %x.addr
78+
ret double %retval
79+
}

0 commit comments

Comments
 (0)