Skip to content

Commit 1f464fc

Browse files
committed
[AMDGPU] Support FMin/FMax in AMDGPUAtomicOptimizer.
Reviewed By: arsenm, #amdgpu Differential Revision: https://reviews.llvm.org/D157388 Change-Id: Ifced32670835e7aeaa43872cc81280e449d0a283
1 parent 30a3adf commit 1f464fc

File tree

5 files changed

+4751
-1719
lines changed

5 files changed

+4751
-1719
lines changed

llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,8 @@ void AMDGPUAtomicOptimizerImpl::visitAtomicRMWInst(AtomicRMWInst &I) {
204204
case AtomicRMWInst::UMin:
205205
case AtomicRMWInst::FAdd:
206206
case AtomicRMWInst::FSub:
207+
case AtomicRMWInst::FMax:
208+
case AtomicRMWInst::FMin:
207209
break;
208210
}
209211

@@ -358,6 +360,10 @@ static Value *buildNonAtomicBinOp(IRBuilder<> &B, AtomicRMWInst::BinOp Op,
358360
case AtomicRMWInst::UMin:
359361
Pred = CmpInst::ICMP_ULT;
360362
break;
363+
case AtomicRMWInst::FMax:
364+
return B.CreateMaxNum(LHS, RHS);
365+
case AtomicRMWInst::FMin:
366+
return B.CreateMinNum(LHS, RHS);
361367
}
362368
Value *Cond = B.CreateICmp(Pred, LHS, RHS);
363369
return B.CreateSelect(Cond, LHS, RHS);
@@ -634,6 +640,10 @@ static Constant *getIdentityValueForAtomicOp(Type *const Ty,
634640
return ConstantFP::get(C, APFloat::getZero(Ty->getFltSemantics(), true));
635641
case AtomicRMWInst::FSub:
636642
return ConstantFP::get(C, APFloat::getZero(Ty->getFltSemantics(), false));
643+
case AtomicRMWInst::FMin:
644+
return ConstantFP::get(C, APFloat::getInf(Ty->getFltSemantics(), false));
645+
case AtomicRMWInst::FMax:
646+
return ConstantFP::get(C, APFloat::getInf(Ty->getFltSemantics(), true));
637647
}
638648
}
639649

@@ -798,6 +808,8 @@ void AMDGPUAtomicOptimizerImpl::optimizeAtomic(Instruction &I,
798808
case AtomicRMWInst::Min:
799809
case AtomicRMWInst::UMax:
800810
case AtomicRMWInst::UMin:
811+
case AtomicRMWInst::FMin:
812+
case AtomicRMWInst::FMax:
801813
// These operations with a uniform value are idempotent: doing the atomic
802814
// operation multiple times has the same effect as doing it once.
803815
NewV = V;
@@ -936,6 +948,8 @@ void AMDGPUAtomicOptimizerImpl::optimizeAtomic(Instruction &I,
936948
case AtomicRMWInst::Min:
937949
case AtomicRMWInst::UMax:
938950
case AtomicRMWInst::UMin:
951+
case AtomicRMWInst::FMin:
952+
case AtomicRMWInst::FMax:
939953
LaneOffset = B.CreateSelect(Cond, Identity, V);
940954
break;
941955
case AtomicRMWInst::Xor:

0 commit comments

Comments
 (0)