Skip to content

Commit a686b6b

Browse files
committed
Use different condition for controlling frem expansion
1 parent b93c39d commit a686b6b

File tree

4 files changed

+38
-24
lines changed

4 files changed

+38
-24
lines changed

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5681,10 +5681,6 @@ class TargetLowering : public TargetLoweringBase {
56815681
LoadSDNode *OriginalLoad,
56825682
SelectionDAG &DAG) const;
56835683

5684-
/// Indicates whether the FRem instruction should be expanded before
5685-
/// ISel in the LLVM IR.
5686-
virtual bool shouldExpandFRemInIR() const { return false; };
5687-
56885684
private:
56895685
SDValue foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
56905686
const SDLoc &DL, DAGCombinerInfo &DCI) const;

llvm/lib/CodeGen/ExpandFp.cpp

Lines changed: 37 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "llvm/Analysis/GlobalsModRef.h"
2020
#include "llvm/Analysis/SimplifyQuery.h"
2121
#include "llvm/Analysis/ValueTracking.h"
22+
#include "llvm/CodeGen/ISDOpcodes.h"
2223
#include "llvm/CodeGen/Passes.h"
2324
#include "llvm/CodeGen/TargetLowering.h"
2425
#include "llvm/CodeGen/TargetPassConfig.h"
@@ -27,9 +28,11 @@
2728
#include "llvm/IR/InstIterator.h"
2829
#include "llvm/IR/PassManager.h"
2930
#include "llvm/IR/Module.h"
31+
#include "llvm/IR/RuntimeLibcalls.h"
3032
#include "llvm/InitializePasses.h"
3133
#include "llvm/Pass.h"
3234
#include "llvm/Support/CommandLine.h"
35+
#include "llvm/Support/ErrorHandling.h"
3336
#include "llvm/Target/TargetMachine.h"
3437
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
3538

@@ -973,7 +976,39 @@ static void scalarize(Instruction *I, SmallVectorImpl<Instruction *> &Replace) {
973976
I->eraseFromParent();
974977
}
975978

976-
static bool runImpl(Function &F, const TargetLowering &TLI) {
979+
// This covers all floating point types; more than we need here.
980+
// TODO Move somewhere else for general use?
981+
/// Return the Libcall for a frem instruction of
982+
/// type \p Ty.
983+
static RTLIB::Libcall fremToLibcall(Type *Ty) {
984+
assert(Ty->isFloatingPointTy());
985+
if (Ty->isFloatTy() || Ty->is16bitFPTy())
986+
return RTLIB::REM_F32;
987+
if (Ty->isDoubleTy())
988+
return RTLIB::REM_F64;
989+
if (Ty->isFP128Ty())
990+
return RTLIB::REM_F128;
991+
if (Ty->isX86_FP80Ty())
992+
return RTLIB::REM_F80;
993+
if (Ty->isPPC_FP128Ty())
994+
return RTLIB::REM_PPCF128;
995+
996+
llvm_unreachable("Unknown floating point type");
997+
}
998+
999+
/* Return true if, according to \p LibInfo, the target either directly
1000+
supports the frem instruction for the \p Ty, has a custom lowering,
1001+
or uses a libcall. */
1002+
static bool targetSupportsFrem(const TargetLowering &TLI,
1003+
const TargetLibraryInfo &LibInfo, Type *Ty) {
1004+
if (!TLI.isOperationExpand(ISD::FREM, EVT::getEVT(Ty)))
1005+
return true;
1006+
1007+
return TLI.getLibcallName(fremToLibcall(Ty->getScalarType()));
1008+
}
1009+
1010+
static bool runImpl(Function &F, const TargetLowering &TLI,
1011+
const TargetLibraryInfo &LibInfo) {
9771012
SmallVector<Instruction *, 4> Replace;
9781013
SmallVector<Instruction *, 4> ReplaceVector;
9791014
bool Modified = false;
@@ -989,7 +1024,7 @@ static bool runImpl(Function &F, const TargetLowering &TLI) {
9891024
for (auto &I : instructions(F)) {
9901025
switch (I.getOpcode()) {
9911026
case Instruction::FRem:
992-
if (TLI.shouldExpandFRemInIR()) {
1027+
if (!targetSupportsFrem(TLI, LibInfo, I.getType())) {
9931028
Replace.push_back(&I);
9941029
Modified = true;
9951030
}

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Lines changed: 1 addition & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -411,7 +411,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
411411
setOperationAction({ISD::LRINT, ISD::LLRINT}, {MVT::f16, MVT::f32, MVT::f64},
412412
Expand);
413413

414-
setOperationAction(ISD::FREM, {MVT::f16, MVT::f32, MVT::f64}, Custom);
414+
setOperationAction(ISD::FREM, {MVT::f16, MVT::f32, MVT::f64}, Expand);
415415

416416
if (Subtarget->has16BitInsts())
417417
setOperationAction(ISD::IS_FPCLASS, {MVT::f16, MVT::f32, MVT::f64}, Legal);
@@ -1424,7 +1424,6 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op,
14241424
case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG);
14251425
case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
14261426
case ISD::SDIVREM: return LowerSDIVREM(Op, DAG);
1427-
case ISD::FREM: return LowerFREM(Op, DAG);
14281427
case ISD::FCEIL: return LowerFCEIL(Op, DAG);
14291428
case ISD::FTRUNC: return LowerFTRUNC(Op, DAG);
14301429
case ISD::FRINT: return LowerFRINT(Op, DAG);
@@ -2393,21 +2392,6 @@ SDValue AMDGPUTargetLowering::LowerSDIVREM(SDValue Op,
23932392
return DAG.getMergeValues(Res, DL);
23942393
}
23952394

2396-
// (frem x, y) -> (fma (fneg (ftrunc (fdiv x, y))), y, x)
2397-
SDValue AMDGPUTargetLowering::LowerFREM(SDValue Op, SelectionDAG &DAG) const {
2398-
SDLoc SL(Op);
2399-
EVT VT = Op.getValueType();
2400-
auto Flags = Op->getFlags();
2401-
SDValue X = Op.getOperand(0);
2402-
SDValue Y = Op.getOperand(1);
2403-
2404-
SDValue Div = DAG.getNode(ISD::FDIV, SL, VT, X, Y, Flags);
2405-
SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, VT, Div, Flags);
2406-
SDValue Neg = DAG.getNode(ISD::FNEG, SL, VT, Trunc, Flags);
2407-
// TODO: For f32 use FMAD instead if !hasFastFMA32?
2408-
return DAG.getNode(ISD::FMA, SL, VT, Neg, Y, X, Flags);
2409-
}
2410-
24112395
SDValue AMDGPUTargetLowering::LowerFCEIL(SDValue Op, SelectionDAG &DAG) const {
24122396
SDLoc SL(Op);
24132397
SDValue Src = Op.getOperand(0);

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -387,7 +387,6 @@ class AMDGPUTargetLowering : public TargetLowering {
387387
MVT getFenceOperandTy(const DataLayout &DL) const override {
388388
return MVT::i32;
389389
}
390-
bool shouldExpandFRemInIR() const override { return true; };
391390
};
392391

393392
namespace AMDGPUISD {

0 commit comments

Comments
 (0)