Skip to content

Commit 45a02d4

Browse files
committed
[Transforms][Utils][PromoteMem2Reg] Propagate nnan and ninf flags on par with the nsz flag
Following the change introduced by the PR llvm#83381, this patch extends it with the same treatment of the nnan and ninf fast-math flags. This is to address the performance drop caused by PR#83200 which prevented vital InstCombine transformation due to the lack of the relevant fast-math flag. The PromoteMem2Reg utility is used by the SROA pass, where Phi nodes are being created. Proposed change allows propagation of the nnan and ninf flags down to these Phi nodes.
1 parent 92af82a commit 45a02d4

File tree

3 files changed

+164
-11
lines changed

3 files changed

+164
-11
lines changed

clang/test/Headers/__clang_hip_math.hip

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1727,7 +1727,7 @@ extern "C" __device__ double test_j1(double x) {
17271727
// FINITEONLY-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_I]], [[X]]
17281728
// FINITEONLY-NEXT: br i1 [[EXITCOND_NOT]], label [[_ZL3JNFIF_EXIT]], label [[FOR_BODY_I]], !llvm.loop [[LOOP14:![0-9]+]]
17291729
// FINITEONLY: _ZL3jnfif.exit:
1730-
// FINITEONLY-NEXT: [[RETVAL_0_I:%.*]] = phi float [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ]
1730+
// FINITEONLY-NEXT: [[RETVAL_0_I:%.*]] = phi nnan ninf float [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ]
17311731
// FINITEONLY-NEXT: ret float [[RETVAL_0_I]]
17321732
//
17331733
// APPROX-LABEL: @test_jnf(
@@ -1830,7 +1830,7 @@ extern "C" __device__ float test_jnf(int x, float y) {
18301830
// FINITEONLY-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_I]], [[X]]
18311831
// FINITEONLY-NEXT: br i1 [[EXITCOND_NOT]], label [[_ZL2JNID_EXIT]], label [[FOR_BODY_I]], !llvm.loop [[LOOP15:![0-9]+]]
18321832
// FINITEONLY: _ZL2jnid.exit:
1833-
// FINITEONLY-NEXT: [[RETVAL_0_I:%.*]] = phi double [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ]
1833+
// FINITEONLY-NEXT: [[RETVAL_0_I:%.*]] = phi nnan ninf double [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ]
18341834
// FINITEONLY-NEXT: ret double [[RETVAL_0_I]]
18351835
//
18361836
// APPROX-LABEL: @test_jn(
@@ -4461,7 +4461,7 @@ extern "C" __device__ double test_y1(double x) {
44614461
// FINITEONLY-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_I]], [[X]]
44624462
// FINITEONLY-NEXT: br i1 [[EXITCOND_NOT]], label [[_ZL3YNFIF_EXIT]], label [[FOR_BODY_I]], !llvm.loop [[LOOP24:![0-9]+]]
44634463
// FINITEONLY: _ZL3ynfif.exit:
4464-
// FINITEONLY-NEXT: [[RETVAL_0_I:%.*]] = phi float [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ]
4464+
// FINITEONLY-NEXT: [[RETVAL_0_I:%.*]] = phi nnan ninf float [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ]
44654465
// FINITEONLY-NEXT: ret float [[RETVAL_0_I]]
44664466
//
44674467
// APPROX-LABEL: @test_ynf(
@@ -4564,7 +4564,7 @@ extern "C" __device__ float test_ynf(int x, float y) {
45644564
// FINITEONLY-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_I]], [[X]]
45654565
// FINITEONLY-NEXT: br i1 [[EXITCOND_NOT]], label [[_ZL2YNID_EXIT]], label [[FOR_BODY_I]], !llvm.loop [[LOOP25:![0-9]+]]
45664566
// FINITEONLY: _ZL2ynid.exit:
4567-
// FINITEONLY-NEXT: [[RETVAL_0_I:%.*]] = phi double [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ]
4567+
// FINITEONLY-NEXT: [[RETVAL_0_I:%.*]] = phi nnan ninf double [ [[CALL_I20_I]], [[IF_THEN_I]] ], [ [[CALL_I22_I]], [[IF_THEN2_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ], [ [[SUB_I]], [[FOR_BODY_I]] ]
45684568
// FINITEONLY-NEXT: ret double [[RETVAL_0_I]]
45694569
//
45704570
// APPROX-LABEL: @test_yn(

llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -394,6 +394,12 @@ struct PromoteMem2Reg {
394394
/// Whether the function has the no-signed-zeros-fp-math attribute set.
395395
bool NoSignedZeros = false;
396396

397+
/// Whether the function has the no-nans-fp-math attribute set.
398+
bool NoNaNs = false;
399+
400+
/// Whether the function has the no-infs-fp-math attribute set.
401+
bool NoInfs = false;
402+
397403
public:
398404
PromoteMem2Reg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT,
399405
AssumptionCache *AC)
@@ -752,6 +758,8 @@ void PromoteMem2Reg::run() {
752758
ForwardIDFCalculator IDF(DT);
753759

754760
NoSignedZeros = F.getFnAttribute("no-signed-zeros-fp-math").getValueAsBool();
761+
NoNaNs = F.getFnAttribute("no-nans-fp-math").getValueAsBool();
762+
NoInfs = F.getFnAttribute("no-infs-fp-math").getValueAsBool();
755763

756764
for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) {
757765
AllocaInst *AI = Allocas[AllocaNum];
@@ -1132,13 +1140,24 @@ void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred,
11321140
for (unsigned i = 0; i != NumEdges; ++i)
11331141
APN->addIncoming(IncomingVals[AllocaNo], Pred);
11341142

1135-
// For the sequence `return X > 0.0 ? X : -X`, it is expected that this
1136-
// results in fabs intrinsic. However, without no-signed-zeros(nsz) flag
1137-
// on the phi node generated at this stage, fabs folding does not
1138-
// happen. So, we try to infer nsz flag from the function attributes to
1139-
// enable this fabs folding.
1140-
if (isa<FPMathOperator>(APN) && NoSignedZeros)
1141-
APN->setHasNoSignedZeros(true);
1143+
if (isa<FPMathOperator>(APN)) {
1144+
// For the sequence `return X > 0.0 ? X : -X`, it is expected that
1145+
// this results in fabs intrinsic. However, without
1146+
// no-signed-zeros(nsz) flag on the phi node generated at this stage,
1147+
// fabs folding does not happen. So, we try to infer nsz flag from the
1148+
// function attributes to enable this fabs folding.
1149+
if (NoSignedZeros)
1150+
APN->setHasNoSignedZeros(true);
1151+
1152+
// This allows select instruction folding relevant to floating point
1153+
// reductions whose operand is a PHI.
1154+
if (NoNaNs)
1155+
APN->setHasNoNaNs(true);
1156+
1157+
// Handle NoInfs flag too.
1158+
if (NoInfs)
1159+
APN->setHasNoInfs(true);
1160+
}
11421161

11431162
// The currently active variable for this block is now the PHI.
11441163
IncomingVals[AllocaNo] = APN;

llvm/test/Transforms/SROA/propagate-fast-math-flags-on-phi.ll

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,3 +77,137 @@ return: ; preds = %entry,%if.then
7777
%retval = load double, ptr %x.addr
7878
ret double %retval
7979
}
80+
81+
define double @phi_with_nnan(double %x) "no-nans-fp-math"="true" {
82+
; CHECK-LABEL: define double @phi_with_nnan(
83+
; CHECK-SAME: double [[X:%.*]]) #[[ATTR2:[0-9]+]] {
84+
; CHECK-NEXT: entry:
85+
; CHECK-NEXT: [[CMP:%.*]] = fcmp olt double [[X]], 0.000000e+00
86+
; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[RETURN:%.*]]
87+
; CHECK: if.then:
88+
; CHECK-NEXT: [[FNEG:%.*]] = fneg double [[X]]
89+
; CHECK-NEXT: br label [[RETURN]]
90+
; CHECK: return:
91+
; CHECK-NEXT: [[X_ADDR_0:%.*]] = phi nnan double [ [[FNEG]], [[IF_THEN]] ], [ undef, [[ENTRY:%.*]] ]
92+
; CHECK-NEXT: ret double [[X_ADDR_0]]
93+
;
94+
entry:
95+
%x.addr = alloca double
96+
%cmp = fcmp olt double %x, 0.0
97+
br i1 %cmp, label %if.then, label %return
98+
99+
if.then: ; preds = %entry
100+
%fneg = fneg double %x
101+
store double %fneg, ptr %x.addr
102+
br label %return
103+
104+
return: ; preds = %entry,%if.then
105+
%retval = load double, ptr %x.addr
106+
ret double %retval
107+
}
108+
109+
define <2 x double> @vector_phi_with_nnan(<2 x double> %x, i1 %cmp, <2 x double> %a, <2 x double> %b) "no-nans-fp-math"="true" {
110+
; CHECK-LABEL: define <2 x double> @vector_phi_with_nnan(
111+
; CHECK-SAME: <2 x double> [[X:%.*]], i1 [[CMP:%.*]], <2 x double> [[A:%.*]], <2 x double> [[B:%.*]]) #[[ATTR2]] {
112+
; CHECK-NEXT: entry:
113+
; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[RETURN:%.*]]
114+
; CHECK: if.then:
115+
; CHECK-NEXT: br label [[RETURN]]
116+
; CHECK: return:
117+
; CHECK-NEXT: [[X_ADDR_0:%.*]] = phi nnan <2 x double> [ [[B]], [[IF_THEN]] ], [ [[A]], [[ENTRY:%.*]] ]
118+
; CHECK-NEXT: ret <2 x double> [[X_ADDR_0]]
119+
;
120+
entry:
121+
%x.addr = alloca <2 x double>
122+
store <2 x double> %a, ptr %x.addr
123+
br i1 %cmp, label %if.then, label %return
124+
125+
if.then: ; preds = %entry
126+
store <2 x double> %b, ptr %x.addr
127+
br label %return
128+
129+
return: ; preds = %entry,%if.then
130+
%retval = load <2 x double>, ptr %x.addr
131+
ret <2 x double> %retval
132+
}
133+
134+
define double @phi_without_nnan(double %x) "no-nans-fp-math"="false" {
135+
; CHECK-LABEL: define double @phi_without_nnan(
136+
; CHECK-SAME: double [[X:%.*]]) #[[ATTR3:[0-9]+]] {
137+
; CHECK-NEXT: entry:
138+
; CHECK-NEXT: [[CMP:%.*]] = fcmp olt double [[X]], 0.000000e+00
139+
; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[RETURN:%.*]]
140+
; CHECK: if.then:
141+
; CHECK-NEXT: [[FNEG:%.*]] = fneg double [[X]]
142+
; CHECK-NEXT: br label [[RETURN]]
143+
; CHECK: return:
144+
; CHECK-NEXT: [[X_ADDR_0:%.*]] = phi double [ [[FNEG]], [[IF_THEN]] ], [ undef, [[ENTRY:%.*]] ]
145+
; CHECK-NEXT: ret double [[X_ADDR_0]]
146+
;
147+
entry:
148+
%x.addr = alloca double
149+
%cmp = fcmp olt double %x, 0.0
150+
br i1 %cmp, label %if.then, label %return
151+
152+
if.then: ; preds = %entry
153+
%fneg = fneg double %x
154+
store double %fneg, ptr %x.addr
155+
br label %return
156+
157+
return: ; preds = %entry,%if.then
158+
%retval = load double, ptr %x.addr
159+
ret double %retval
160+
}
161+
162+
define <2 x double> @vector_phi_with_ninf(<2 x double> %x, i1 %cmp, <2 x double> %a, <2 x double> %b) "no-infs-fp-math"="true" {
163+
; CHECK-LABEL: define <2 x double> @vector_phi_with_ninf(
164+
; CHECK-SAME: <2 x double> [[X:%.*]], i1 [[CMP:%.*]], <2 x double> [[A:%.*]], <2 x double> [[B:%.*]]) #[[ATTR4:[0-9]+]] {
165+
; CHECK-NEXT: entry:
166+
; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[RETURN:%.*]]
167+
; CHECK: if.then:
168+
; CHECK-NEXT: br label [[RETURN]]
169+
; CHECK: return:
170+
; CHECK-NEXT: [[X_ADDR_0:%.*]] = phi ninf <2 x double> [ [[B]], [[IF_THEN]] ], [ [[A]], [[ENTRY:%.*]] ]
171+
; CHECK-NEXT: ret <2 x double> [[X_ADDR_0]]
172+
;
173+
entry:
174+
%x.addr = alloca <2 x double>
175+
store <2 x double> %a, ptr %x.addr
176+
br i1 %cmp, label %if.then, label %return
177+
178+
if.then: ; preds = %entry
179+
store <2 x double> %b, ptr %x.addr
180+
br label %return
181+
182+
return: ; preds = %entry,%if.then
183+
%retval = load <2 x double>, ptr %x.addr
184+
ret <2 x double> %retval
185+
}
186+
187+
define double @phi_without_ninf(double %x) "no-infs-fp-math"="false" {
188+
; CHECK-LABEL: define double @phi_without_ninf(
189+
; CHECK-SAME: double [[X:%.*]]) #[[ATTR5:[0-9]+]] {
190+
; CHECK-NEXT: entry:
191+
; CHECK-NEXT: [[CMP:%.*]] = fcmp olt double [[X]], 0.000000e+00
192+
; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[RETURN:%.*]]
193+
; CHECK: if.then:
194+
; CHECK-NEXT: [[FNEG:%.*]] = fneg double [[X]]
195+
; CHECK-NEXT: br label [[RETURN]]
196+
; CHECK: return:
197+
; CHECK-NEXT: [[X_ADDR_0:%.*]] = phi double [ [[FNEG]], [[IF_THEN]] ], [ undef, [[ENTRY:%.*]] ]
198+
; CHECK-NEXT: ret double [[X_ADDR_0]]
199+
;
200+
entry:
201+
%x.addr = alloca double
202+
%cmp = fcmp olt double %x, 0.0
203+
br i1 %cmp, label %if.then, label %return
204+
205+
if.then: ; preds = %entry
206+
%fneg = fneg double %x
207+
store double %fneg, ptr %x.addr
208+
br label %return
209+
210+
return: ; preds = %entry,%if.then
211+
%retval = load double, ptr %x.addr
212+
ret double %retval
213+
}

0 commit comments

Comments
 (0)