@@ -204,6 +204,8 @@ void AMDGPUAtomicOptimizerImpl::visitAtomicRMWInst(AtomicRMWInst &I) {
204
204
case AtomicRMWInst::UMin:
205
205
case AtomicRMWInst::FAdd:
206
206
case AtomicRMWInst::FSub:
207
+ case AtomicRMWInst::FMax:
208
+ case AtomicRMWInst::FMin:
207
209
break ;
208
210
}
209
211
@@ -358,6 +360,10 @@ static Value *buildNonAtomicBinOp(IRBuilder<> &B, AtomicRMWInst::BinOp Op,
358
360
case AtomicRMWInst::UMin:
359
361
Pred = CmpInst::ICMP_ULT;
360
362
break ;
363
+ case AtomicRMWInst::FMax:
364
+ return B.CreateMaxNum (LHS, RHS);
365
+ case AtomicRMWInst::FMin:
366
+ return B.CreateMinNum (LHS, RHS);
361
367
}
362
368
Value *Cond = B.CreateICmp (Pred, LHS, RHS);
363
369
return B.CreateSelect (Cond, LHS, RHS);
@@ -634,6 +640,10 @@ static Constant *getIdentityValueForAtomicOp(Type *const Ty,
634
640
return ConstantFP::get (C, APFloat::getZero (Ty->getFltSemantics (), true ));
635
641
case AtomicRMWInst::FSub:
636
642
return ConstantFP::get (C, APFloat::getZero (Ty->getFltSemantics (), false ));
643
+ case AtomicRMWInst::FMin:
644
+ return ConstantFP::get (C, APFloat::getInf (Ty->getFltSemantics (), false ));
645
+ case AtomicRMWInst::FMax:
646
+ return ConstantFP::get (C, APFloat::getInf (Ty->getFltSemantics (), true ));
637
647
}
638
648
}
639
649
@@ -798,6 +808,8 @@ void AMDGPUAtomicOptimizerImpl::optimizeAtomic(Instruction &I,
798
808
case AtomicRMWInst::Min:
799
809
case AtomicRMWInst::UMax:
800
810
case AtomicRMWInst::UMin:
811
+ case AtomicRMWInst::FMin:
812
+ case AtomicRMWInst::FMax:
801
813
// These operations with a uniform value are idempotent: doing the atomic
802
814
// operation multiple times has the same effect as doing it once.
803
815
NewV = V;
@@ -936,6 +948,8 @@ void AMDGPUAtomicOptimizerImpl::optimizeAtomic(Instruction &I,
936
948
case AtomicRMWInst::Min:
937
949
case AtomicRMWInst::UMax:
938
950
case AtomicRMWInst::UMin:
951
+ case AtomicRMWInst::FMin:
952
+ case AtomicRMWInst::FMax:
939
953
LaneOffset = B.CreateSelect (Cond, Identity, V);
940
954
break ;
941
955
case AtomicRMWInst::Xor:
0 commit comments