Skip to content

Commit c78c3c6

Browse files
committed
AMDGPU: Handle other fmin flavors in fract combine
Since the input is either known not-nan, or we have explicit use code checking if the input is a nan, any of the 3 is valid to match.
1 parent 825f96c commit c78c3c6

File tree

2 files changed

+230
-278
lines changed

2 files changed

+230
-278
lines changed

llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -327,7 +327,7 @@ class AMDGPUCodeGenPrepareImpl
327327

328328
bool visitIntrinsicInst(IntrinsicInst &I);
329329
bool visitBitreverseIntrinsicInst(IntrinsicInst &I);
330-
bool visitMinNum(IntrinsicInst &I);
330+
bool visitFMinLike(IntrinsicInst &I);
331331
bool visitSqrt(IntrinsicInst &I);
332332
bool run();
333333
};
@@ -2197,7 +2197,9 @@ bool AMDGPUCodeGenPrepareImpl::visitIntrinsicInst(IntrinsicInst &I) {
21972197
case Intrinsic::bitreverse:
21982198
return visitBitreverseIntrinsicInst(I);
21992199
case Intrinsic::minnum:
2200-
return visitMinNum(I);
2200+
case Intrinsic::minimumnum:
2201+
case Intrinsic::minimum:
2202+
return visitFMinLike(I);
22012203
case Intrinsic::sqrt:
22022204
return visitSqrt(I);
22032205
default:
@@ -2216,15 +2218,22 @@ bool AMDGPUCodeGenPrepareImpl::visitBitreverseIntrinsicInst(IntrinsicInst &I) {
22162218
}
22172219

22182220
/// Match non-nan fract pattern.
2219-
/// minnum(fsub(x, floor(x)), nextafter(1.0, -1.0)
2221+
/// minnum(fsub(x, floor(x)), nextafter(1.0, -1.0))
2222+
/// minimumnum(fsub(x, floor(x)), nextafter(1.0, -1.0))
2223+
/// minimum(fsub(x, floor(x)), nextafter(1.0, -1.0))
22202224
///
22212225
/// If fract is a useful instruction for the subtarget. Does not account for the
22222226
/// nan handling; the instruction has a nan check on the input value.
22232227
Value *AMDGPUCodeGenPrepareImpl::matchFractPat(IntrinsicInst &I) {
22242228
if (ST.hasFractBug())
22252229
return nullptr;
22262230

2227-
if (I.getIntrinsicID() != Intrinsic::minnum)
2231+
Intrinsic::ID IID = I.getIntrinsicID();
2232+
2233+
// The value is only used in contexts where we know the input isn't a nan, so
2234+
// any of the fmin variants are fine.
2235+
if (IID != Intrinsic::minnum &&
2236+
IID != Intrinsic::minimumnum & IID != Intrinsic::minimum)
22282237
return nullptr;
22292238

22302239
Type *Ty = I.getType();
@@ -2270,7 +2279,7 @@ Value *AMDGPUCodeGenPrepareImpl::applyFractPat(IRBuilder<> &Builder,
22702279
return insertValues(Builder, FractArg->getType(), ResultVals);
22712280
}
22722281

2273-
bool AMDGPUCodeGenPrepareImpl::visitMinNum(IntrinsicInst &I) {
2282+
bool AMDGPUCodeGenPrepareImpl::visitFMinLike(IntrinsicInst &I) {
22742283
Value *FractArg = matchFractPat(I);
22752284
if (!FractArg)
22762285
return false;

0 commit comments

Comments
 (0)