17
17
#include " llvm/CodeGen/ExpandFp.h"
18
18
#include " llvm/ADT/SmallVector.h"
19
19
#include " llvm/Analysis/GlobalsModRef.h"
20
+ #include " llvm/Analysis/SimplifyQuery.h"
21
+ #include " llvm/Analysis/ValueTracking.h"
22
+ #include " llvm/CodeGen/ISDOpcodes.h"
20
23
#include " llvm/CodeGen/Passes.h"
21
24
#include " llvm/CodeGen/TargetLowering.h"
22
25
#include " llvm/CodeGen/TargetPassConfig.h"
23
26
#include " llvm/CodeGen/TargetSubtargetInfo.h"
24
27
#include " llvm/IR/IRBuilder.h"
25
28
#include " llvm/IR/InstIterator.h"
26
29
#include " llvm/IR/PassManager.h"
30
+ #include " llvm/IR/Module.h"
31
+ #include " llvm/IR/RuntimeLibcalls.h"
27
32
#include " llvm/InitializePasses.h"
28
33
#include " llvm/Pass.h"
29
34
#include " llvm/Support/CommandLine.h"
35
+ #include " llvm/Support/ErrorHandling.h"
30
36
#include " llvm/Target/TargetMachine.h"
31
37
#include " llvm/Transforms/Utils/BasicBlockUtils.h"
32
38
@@ -89,7 +95,7 @@ class FRemExpander {
89
95
// / must match the type for which the class instance has been
90
96
// / created. The code will be generated at the insertion point of \p
91
97
// / B and the insertion point will be reset at exit.
92
- Value *buildFRem (Value *X, Value *Y) const ;
98
+ Value *buildFRem (Value *X, Value *Y, SimplifyQuery &SQ ) const ;
93
99
94
100
private:
95
101
FRemExpander (IRBuilder<> &B, Type *FremTy, short Bits, unsigned long Signbit,
@@ -98,11 +104,6 @@ class FRemExpander {
98
104
ExTy (B.getInt32Ty()), Bits(ConstantInt::get(ExTy, Bits)),
99
105
One(ConstantInt::get(ExTy, 1 )), Signbit(Signbit) {};
100
106
101
- Value *createLdexp (Value *Base, Value *Exp, const Twine &Name) const {
102
- return B.CreateIntrinsic (Intrinsic::ldexp, {ComputeFpTy, B.getInt32Ty ()},
103
- {Base, Exp}, {}, Name);
104
- }
105
-
106
107
Value *createRcp (Value *V, const Twine &Name) const {
107
108
return B.CreateFDiv (ConstantFP::get (ComputeFpTy, 1.0 ), V, Name);
108
109
}
@@ -118,8 +119,7 @@ class FRemExpander {
118
119
// ax = clt ? axp : ax;
119
120
Value *Q = B.CreateUnaryIntrinsic (Intrinsic::rint, B.CreateFMul (Ax, Ayinv),
120
121
{}, " q" );
121
- Value *AxUpdate = B.CreateIntrinsic (Intrinsic::fma, {ComputeFpTy},
122
- {B.CreateFNeg (Q), Ay, Ax}, {}, " ax" );
122
+ Value *AxUpdate = B.CreateFMA (B.CreateFNeg (Q), Ay, Ax, {}, " ax" );
123
123
Value *Clt = B.CreateFCmp (CmpInst::FCMP_OLT, AxUpdate,
124
124
ConstantFP::get (ComputeFpTy, 0.0 ), " clt" );
125
125
Value *Axp = B.CreateFAdd (AxUpdate, Ay, " axp" );
@@ -145,7 +145,7 @@ class FRemExpander {
145
145
Value *Exp = B.CreateExtractValue (Frexp, {1 });
146
146
147
147
Exp = B.CreateSub (Exp, One, ExName);
148
- Value *Pow = createLdexp (Mant, NewExp, PowName);
148
+ Value *Pow = B. CreateLdexp (Mant, NewExp, {} , PowName);
149
149
150
150
return {Pow, Exp};
151
151
}
@@ -194,7 +194,7 @@ class FRemExpander {
194
194
AxPhi->addIncoming (Ax, PreheaderBB);
195
195
196
196
Value *AxPhiUpdate = buildUpdateAx (AxPhi, Ay, Ayinv);
197
- AxPhiUpdate = createLdexp (AxPhiUpdate, Bits, " ax_update" );
197
+ AxPhiUpdate = B. CreateLdexp (AxPhiUpdate, Bits, {} , " ax_update" );
198
198
AxPhi->addIncoming (AxPhiUpdate, LoopBB);
199
199
NbIv->addIncoming (B.CreateSub (NbIv, Bits, " nb_update" ), LoopBB);
200
200
@@ -212,14 +212,14 @@ class FRemExpander {
212
212
NbExitPhi->addIncoming (NbIv, LoopBB);
213
213
NbExitPhi->addIncoming (Nb, PreheaderBB);
214
214
215
- Value *AxFinal = createLdexp (
216
- AxPhiExit, B.CreateAdd (B.CreateSub (NbExitPhi, Bits), One), " ax" );
215
+ Value *AxFinal = B. CreateLdexp (
216
+ AxPhiExit, B.CreateAdd (B.CreateSub (NbExitPhi, Bits), One), {}, " ax" );
217
217
AxFinal = buildUpdateAx (AxFinal, Ay, Ayinv);
218
218
219
219
// Build:
220
220
// ax = BUILTIN_FLDEXP_ComputeFpTy(ax, ey);
221
221
// ret = AS_FLOAT((AS_INT(x) & SIGNBIT_SP32) ^ AS_INT(ax));
222
- AxFinal = createLdexp (AxFinal, Ey, " ax" );
222
+ AxFinal = B. CreateLdexp (AxFinal, Ey, {} , " ax" );
223
223
224
224
Value *XAsInt = B.CreateBitCast (X, IntTy, " x_as_int" );
225
225
if (ComputeFpTy != X->getType ())
@@ -249,28 +249,32 @@ class FRemExpander {
249
249
RetPhi->addIncoming (Ret, B.GetInsertBlock ());
250
250
}
251
251
252
- // / Adjust the result of the main computation from the FRem expansion
253
- // / if NaNs or infinite values are possible.
254
- Value *buildNanAndInfHandling (Value *Ret, Value *X, Value *Y) const {
252
+ // / Return a value that is NaN if one of the corner cases concerning
253
+ // / the inputs \p X and \p Y is detected, and \p Ret otherwise.
254
+ Value *handleInputCornerCases (Value *Ret, Value *X,
255
+ Value *Y, SimplifyQuery &SQ) const {
255
256
// Build:
256
257
// ret = y == 0.0f ? QNAN_ComputeFpTy : ret;
257
258
// bool c = !BUILTIN_ISNAN_ComputeFpTy(y) &&
258
259
// BUILTIN_ISFINITE_ComputeFpTy(x);
259
260
// ret = c ? ret : QNAN_ComputeFpTy;
260
- // TODO Handle NaN and infinity fast math flags separately here?
261
261
Value *Nan = ConstantFP::getQNaN (FremTy);
262
-
263
- Ret = B.CreateSelect (B.createIsFPClass (Y, FPClassTest::fcZero), Nan, Ret);
264
- Value *C = B.CreateLogicalAnd (
265
- B.CreateNot (B.createIsFPClass (Y, FPClassTest::fcNan)),
266
- B.createIsFPClass (X, FPClassTest::fcFinite));
262
+ Ret = B.CreateSelect (B.CreateFCmpOEQ (Y, ConstantFP::get (FremTy, 0.0 )), Nan,
263
+ Ret);
264
+ FPClassTest NotNan = FPClassTest::fcInf | FPClassTest::fcFinite;
265
+ Value *YNotNan =
266
+ isKnownNeverNaN (Y, 0 , SQ) ? B.getTrue () : B.createIsFPClass (Y, NotNan);
267
+ Value *XFinite = isKnownNeverInfinity (X, 0 , SQ)
268
+ ? B.getTrue ()
269
+ : B.createIsFPClass (X, FPClassTest::fcFinite);
270
+ Value *C = B.CreateLogicalAnd (YNotNan, XFinite);
267
271
Ret = B.CreateSelect (C, Ret, Nan);
268
272
269
273
return Ret;
270
274
}
271
275
};
272
276
273
- Value *FRemExpander::buildFRem (Value *X, Value *Y) const {
277
+ Value *FRemExpander::buildFRem (Value *X, Value *Y, SimplifyQuery &SQ ) const {
274
278
assert (X->getType () == FremTy && Y->getType () == FremTy);
275
279
276
280
FastMathFlags FMF = B.getFastMathFlags ();
@@ -293,8 +297,10 @@ Value *FRemExpander::buildFRem(Value *X, Value *Y) const {
293
297
PHINode *RetPhi = B.CreatePHI (FremTy, 2 , " ret" );
294
298
Value *Ret = RetPhi;
295
299
296
- if (!FMF.noNaNs () || !FMF.noInfs ())
297
- Ret = buildNanAndInfHandling (Ret, X, Y);
300
+ // We would return NaN in all corner cases handled here.
301
+ // Hence, if NaNs are excluded, keep the result as it is.
302
+ if (!FMF.noNaNs ())
303
+ Ret = handleInputCornerCases (Ret, X, Y, SQ);
298
304
299
305
Function *Fun = B.GetInsertBlock ()->getParent ();
300
306
auto *ThenBB = BasicBlock::Create (B.getContext (), " frem.compute" , Fun);
@@ -352,7 +358,7 @@ static bool shouldSkipExpandFRem(BinaryOperator &I) {
352
358
isConstOrConstSelectOp (I.getOperand (1 ));
353
359
}
354
360
355
- static bool expandFRem (BinaryOperator &I) {
361
+ static bool expandFRem (BinaryOperator &I, SimplifyQuery &SQ ) {
356
362
LLVM_DEBUG (dbgs () << " Expanding instruction: " << I << ' \n ' );
357
363
if (shouldSkipExpandFRem (I)) {
358
364
LLVM_DEBUG (
@@ -384,7 +390,7 @@ static bool expandFRem(BinaryOperator &I) {
384
390
385
391
Value *Ret;
386
392
if (ReturnTy->isFloatingPointTy ())
387
- Ret = Expander->buildFRem (I.getOperand (0 ), I.getOperand (1 ));
393
+ Ret = Expander->buildFRem (I.getOperand (0 ), I.getOperand (1 ), SQ );
388
394
else {
389
395
auto *VecTy = cast<FixedVectorType>(ReturnTy);
390
396
@@ -398,7 +404,7 @@ static bool expandFRem(BinaryOperator &I) {
398
404
for (int I = 0 , E = VecTy->getNumElements (); I != E; ++I) {
399
405
Value *Num = B.CreateExtractElement (Nums, I);
400
406
Value *Denum = B.CreateExtractElement (Denums, I);
401
- Value *Rem = Expander->buildFRem (Num, Denum);
407
+ Value *Rem = Expander->buildFRem (Num, Denum, SQ );
402
408
Ret = B.CreateInsertElement (Ret, Rem, I);
403
409
}
404
410
}
@@ -963,6 +969,36 @@ static void scalarize(Instruction *I, SmallVectorImpl<Instruction *> &Replace) {
963
969
I->eraseFromParent ();
964
970
}
965
971
972
+ // This covers all floating point types; more than we need here.
973
+ // TODO Move somewhere else for general use?
974
+ // / Return the Libcall for a frem instruction of
975
+ // / type \p Ty.
976
+ static RTLIB::Libcall fremToLibcall (Type *Ty) {
977
+ assert (Ty->isFloatingPointTy ());
978
+ if (Ty->isFloatTy () || Ty->is16bitFPTy ())
979
+ return RTLIB::REM_F32;
980
+ if (Ty->isDoubleTy ())
981
+ return RTLIB::REM_F64;
982
+ if (Ty->isFP128Ty ())
983
+ return RTLIB::REM_F128;
984
+ if (Ty->isX86_FP80Ty ())
985
+ return RTLIB::REM_F80;
986
+ if (Ty->isPPC_FP128Ty ())
987
+ return RTLIB::REM_PPCF128;
988
+
989
+ llvm_unreachable (" Unknown floating point type" );
990
+ }
991
+
992
+ /* Return true if, according to \p LibInfo, the target either directly
993
+ supports the frem instruction for the \p Ty, has a custom lowering,
994
+ or uses a libcall. */
995
+ static bool targetSupportsFrem (const TargetLowering &TLI, Type *Ty) {
996
+ if (!TLI.isOperationExpand (ISD::FREM, EVT::getEVT (Ty)))
997
+ return true ;
998
+
999
+ return TLI.getLibcallName (fremToLibcall (Ty->getScalarType ()));
1000
+ }
1001
+
966
1002
static bool runImpl (Function &F, const TargetLowering &TLI) {
967
1003
SmallVector<Instruction *, 4 > Replace;
968
1004
SmallVector<Instruction *, 4 > ReplaceVector;
@@ -979,7 +1015,7 @@ static bool runImpl(Function &F, const TargetLowering &TLI) {
979
1015
for (auto &I : instructions (F)) {
980
1016
switch (I.getOpcode ()) {
981
1017
case Instruction::FRem:
982
- if (TLI. shouldExpandFRemInIR ( )) {
1018
+ if (! targetSupportsFrem ( TLI, I. getType () )) {
983
1019
Replace.push_back (&I);
984
1020
Modified = true ;
985
1021
}
@@ -1034,10 +1070,11 @@ static bool runImpl(Function &F, const TargetLowering &TLI) {
1034
1070
1035
1071
while (!Replace.empty ()) {
1036
1072
Instruction *I = Replace.pop_back_val ();
1037
- if (I->getOpcode () == Instruction::FRem)
1038
- expandFRem (cast<BinaryOperator>(*I));
1039
- else if (I->getOpcode () == Instruction::FPToUI ||
1040
- I->getOpcode () == Instruction::FPToSI) {
1073
+ if (I->getOpcode () == Instruction::FRem) {
1074
+ auto SQ = SimplifyQuery{I->getModule ()->getDataLayout (), I};
1075
+ expandFRem (cast<BinaryOperator>(*I), SQ);
1076
+ } else if (I->getOpcode () == Instruction::FPToUI ||
1077
+ I->getOpcode () == Instruction::FPToSI) {
1041
1078
expandFPToI (I);
1042
1079
} else {
1043
1080
expandIToFP (I);
0 commit comments