@@ -75,7 +75,7 @@ static bool foldGuardedFunnelShift(Instruction &I, const DominatorTree &DT) {
75
75
m_Shl (m_Value (ShVal0), m_Value (ShAmt)),
76
76
m_LShr (m_Value (ShVal1),
77
77
m_Sub (m_SpecificInt (Width), m_Deferred (ShAmt))))))) {
78
- return Intrinsic::fshl;
78
+ return Intrinsic::fshl;
79
79
}
80
80
81
81
// fshr(ShVal0, ShVal1, ShAmt)
@@ -84,7 +84,7 @@ static bool foldGuardedFunnelShift(Instruction &I, const DominatorTree &DT) {
84
84
m_OneUse (m_c_Or (m_Shl (m_Value (ShVal0), m_Sub (m_SpecificInt (Width),
85
85
m_Value (ShAmt))),
86
86
m_LShr (m_Value (ShVal1), m_Deferred (ShAmt)))))) {
87
- return Intrinsic::fshr;
87
+ return Intrinsic::fshr;
88
88
}
89
89
90
90
return Intrinsic::not_intrinsic;
@@ -401,21 +401,11 @@ static bool tryToFPToSat(Instruction &I, TargetTransformInfo &TTI) {
401
401
// / Try to replace a mathlib call to sqrt with the LLVM intrinsic. This avoids
402
402
// / pessimistic codegen that has to account for setting errno and can enable
403
403
// / vectorization.
404
- static bool foldSqrt (Instruction &I , TargetTransformInfo &TTI,
404
+ static bool foldSqrt (CallInst *Call, LibFunc Func , TargetTransformInfo &TTI,
405
405
TargetLibraryInfo &TLI, AssumptionCache &AC,
406
406
DominatorTree &DT) {
407
- // Match a call to sqrt mathlib function.
408
- auto *Call = dyn_cast<CallInst>(&I);
409
- if (!Call)
410
- return false ;
411
407
412
408
Module *M = Call->getModule ();
413
- LibFunc Func;
414
- if (!TLI.getLibFunc (*Call, Func) || !isLibFuncEmittable (M, &TLI, Func))
415
- return false ;
416
-
417
- if (Func != LibFunc_sqrt && Func != LibFunc_sqrtf && Func != LibFunc_sqrtl)
418
- return false ;
419
409
420
410
// If (1) this is a sqrt libcall, (2) we can assume that NAN is not created
421
411
// (because NNAN or the operand arg must not be less than -0.0) and (2) we
@@ -428,18 +418,18 @@ static bool foldSqrt(Instruction &I, TargetTransformInfo &TTI,
428
418
if (TTI.haveFastSqrt (Ty) &&
429
419
(Call->hasNoNaNs () ||
430
420
cannotBeOrderedLessThanZero (
431
- Arg, 0 , SimplifyQuery (M->getDataLayout (), &TLI, &DT, &AC, &I )))) {
432
- IRBuilder<> Builder (&I );
421
+ Arg, 0 , SimplifyQuery (M->getDataLayout (), &TLI, &DT, &AC, Call )))) {
422
+ IRBuilder<> Builder (Call );
433
423
IRBuilderBase::FastMathFlagGuard Guard (Builder);
434
424
Builder.setFastMathFlags (Call->getFastMathFlags ());
435
425
436
426
Function *Sqrt = Intrinsic::getDeclaration (M, Intrinsic::sqrt, Ty);
437
427
Value *NewSqrt = Builder.CreateCall (Sqrt, Arg, " sqrt" );
438
- I. replaceAllUsesWith (NewSqrt);
428
+ Call-> replaceAllUsesWith (NewSqrt);
439
429
440
430
// Explicitly erase the old call because a call with side effects is not
441
431
// trivially dead.
442
- I. eraseFromParent ();
432
+ Call-> eraseFromParent ();
443
433
return true ;
444
434
}
445
435
@@ -932,18 +922,17 @@ static cl::opt<unsigned> StrNCmpInlineThreshold(
932
922
namespace {
933
923
class StrNCmpInliner {
934
924
public:
935
- StrNCmpInliner (CallInst *CI, LibFunc Func, Function::iterator &BBNext ,
936
- DomTreeUpdater *DTU, const DataLayout &DL)
937
- : CI(CI), Func(Func), BBNext(BBNext), DTU(DTU), DL(DL) {}
925
+ StrNCmpInliner (CallInst *CI, LibFunc Func, DomTreeUpdater *DTU ,
926
+ const DataLayout &DL)
927
+ : CI(CI), Func(Func), DTU(DTU), DL(DL) {}
938
928
939
929
bool optimizeStrNCmp ();
940
930
941
931
private:
942
- bool inlineCompare (Value *LHS, StringRef RHS, uint64_t N, bool Switched );
932
+ bool inlineCompare (Value *LHS, StringRef RHS, uint64_t N, bool Swapped );
943
933
944
934
CallInst *CI;
945
935
LibFunc Func;
946
- Function::iterator &BBNext;
947
936
DomTreeUpdater *DTU;
948
937
const DataLayout &DL;
949
938
};
@@ -952,7 +941,7 @@ class StrNCmpInliner {
952
941
953
942
// / First we normalize calls to strncmp/strcmp to the form of
954
943
// / compare(s1, s2, N), which means comparing first N bytes of s1 and s2
955
- // / (without considering '\0')
944
+ // / (without considering '\0').
956
945
// /
957
946
// / Examples:
958
947
// /
@@ -969,49 +958,53 @@ class StrNCmpInliner {
969
958
// / strncmp(s, s2, 3) -> compare(s, s2, 3)
970
959
// / \endcode
971
960
// /
972
- // / We only handle cases that N and exactly one of s1 and s2 are constant. Cases
973
- // / that s1 and s2 are both constant are already handled by the instcombine
974
- // / pass.
961
+ // / We only handle cases where N and exactly one of s1 and s2 are constant.
962
+ // / Cases that s1 and s2 are both constant are already handled by the
963
+ // / instcombine pass.
975
964
// /
976
- // / We do not handle cases that N > StrNCmpInlineThreshold.
965
+ // / We do not handle cases where N > StrNCmpInlineThreshold.
977
966
// /
978
- // / We also do not handles cases that N < 2, which are already
967
+ // / We also do not handles cases where N < 2, which are already
979
968
// / handled by the instcombine pass.
980
969
// /
981
970
bool StrNCmpInliner::optimizeStrNCmp () {
982
971
if (StrNCmpInlineThreshold < 2 )
983
972
return false ;
984
973
974
+ if (!isOnlyUsedInZeroComparison (CI))
975
+ return false ;
976
+
985
977
Value *Str1P = CI->getArgOperand (0 );
986
978
Value *Str2P = CI->getArgOperand (1 );
987
- // should be handled elsewhere
979
+ // Should be handled elsewhere.
988
980
if (Str1P == Str2P)
989
981
return false ;
990
982
991
983
StringRef Str1, Str2;
992
984
bool HasStr1 = getConstantStringInfo (Str1P, Str1, false );
993
985
bool HasStr2 = getConstantStringInfo (Str2P, Str2, false );
994
- if (!( HasStr1 ^ HasStr2) )
986
+ if (HasStr1 == HasStr2)
995
987
return false ;
996
988
997
- // note that '\0' and characters after it are not trimmed
989
+ // Note that '\0' and characters after it are not trimmed.
998
990
StringRef Str = HasStr1 ? Str1 : Str2;
999
991
1000
992
size_t Idx = Str.find (' \0 ' );
1001
993
uint64_t N = Idx == StringRef::npos ? UINT64_MAX : Idx + 1 ;
1002
994
if (Func == LibFunc_strncmp) {
1003
- if (!isa<ConstantInt>(CI->getArgOperand (2 )))
995
+ if (auto ConstInt = dyn_cast<ConstantInt>(CI->getArgOperand (2 )))
996
+ N = std::min (N, ConstInt->getZExtValue ());
997
+ else
1004
998
return false ;
1005
- N = std::min (N, cast<ConstantInt>(CI->getArgOperand (2 ))->getZExtValue ());
1006
999
}
1007
- // now N means how many bytes we need to compare at most
1000
+ // Now N means how many bytes we need to compare at most.
1008
1001
if (N > Str.size () || N < 2 || N > StrNCmpInlineThreshold)
1009
1002
return false ;
1010
1003
1011
1004
Value *StrP = HasStr1 ? Str2P : Str1P;
1012
1005
1013
- // cases that StrP has two or more dereferenceable bytes might be better
1014
- // optimized elsewhere
1006
+ // Cases where StrP has two or more dereferenceable bytes might be better
1007
+ // optimized elsewhere.
1015
1008
bool CanBeNull = false , CanBeFreed = false ;
1016
1009
if (StrP->getPointerDereferenceableBytes (DL, CanBeNull, CanBeFreed) > 1 )
1017
1010
return false ;
@@ -1054,7 +1047,7 @@ bool StrNCmpInliner::optimizeStrNCmp() {
1054
1047
// / BBSubs[N-1] (sub) ---------+
1055
1048
// /
1056
1049
bool StrNCmpInliner::inlineCompare (Value *LHS, StringRef RHS, uint64_t N,
1057
- bool Switched ) {
1050
+ bool Swapped ) {
1058
1051
auto &Ctx = CI->getContext ();
1059
1052
IRBuilder<> B (Ctx);
1060
1053
@@ -1076,12 +1069,12 @@ bool StrNCmpInliner::inlineCompare(Value *LHS, StringRef RHS, uint64_t N,
1076
1069
Value *Base = LHS;
1077
1070
for (uint64_t i = 0 ; i < N; ++i) {
1078
1071
B.SetInsertPoint (BBSubs[i]);
1079
- Value *VL = B. CreateZExt (
1080
- B.CreateLoad (B.getInt8Ty (),
1081
- B. CreateInBoundsGEP (B. getInt8Ty (), Base, B.getInt64 (i))),
1082
- CI->getType ());
1072
+ Value *VL =
1073
+ B.CreateZExt (B. CreateLoad (B.getInt8Ty (),
1074
+ B. CreateInBoundsPtrAdd ( Base, B.getInt64 (i))),
1075
+ CI->getType ());
1083
1076
Value *VR = ConstantInt::get (CI->getType (), RHS[i]);
1084
- Value *Sub = Switched ? B.CreateSub (VR, VL) : B.CreateSub (VL, VR);
1077
+ Value *Sub = Swapped ? B.CreateSub (VR, VL) : B.CreateSub (VL, VR);
1085
1078
if (i < N - 1 )
1086
1079
B.CreateCondBr (B.CreateICmpNE (Sub, ConstantInt::get (CI->getType (), 0 )),
1087
1080
BBNE, BBSubs[i + 1 ]);
@@ -1094,67 +1087,56 @@ bool StrNCmpInliner::inlineCompare(Value *LHS, StringRef RHS, uint64_t N,
1094
1087
CI->replaceAllUsesWith (Phi);
1095
1088
CI->eraseFromParent ();
1096
1089
1097
- BBNext = BBCI->getIterator ();
1098
-
1099
- // Update DomTree
1100
1090
if (DTU) {
1101
1091
SmallVector<DominatorTree::UpdateType, 8 > Updates;
1102
- Updates.push_back ({DominatorTree::Delete, BBBefore, BBCI});
1103
1092
Updates.push_back ({DominatorTree::Insert, BBBefore, BBSubs[0 ]});
1104
1093
for (uint64_t i = 0 ; i < N; ++i) {
1105
1094
if (i < N - 1 )
1106
1095
Updates.push_back ({DominatorTree::Insert, BBSubs[i], BBSubs[i + 1 ]});
1107
1096
Updates.push_back ({DominatorTree::Insert, BBSubs[i], BBNE});
1108
1097
}
1109
1098
Updates.push_back ({DominatorTree::Insert, BBNE, BBCI});
1099
+ Updates.push_back ({DominatorTree::Delete, BBBefore, BBCI});
1110
1100
DTU->applyUpdates (Updates);
1111
1101
}
1112
1102
return true ;
1113
1103
}
1114
1104
1115
- static bool inlineLibCalls (Function &F, TargetLibraryInfo &TLI,
1116
- const TargetTransformInfo &TTI, DominatorTree &DT,
1117
- const DataLayout &DL, bool &MadeCFGChange) {
1118
- MadeCFGChange = false ;
1119
- DomTreeUpdater DTU (&DT, DomTreeUpdater::UpdateStrategy::Lazy);
1120
-
1121
- bool MadeChange = false ;
1122
-
1123
- Function::iterator CurrBB;
1124
- for (Function::iterator BB = F.begin (), BE = F.end (); BB != BE;) {
1125
- CurrBB = BB++;
1105
+ static bool foldLibCalls (Instruction &I, TargetTransformInfo &TTI,
1106
+ TargetLibraryInfo &TLI, llvm::AssumptionCache &AC,
1107
+ DominatorTree &DT, const DataLayout &DL,
1108
+ bool &MadeCFGChange) {
1126
1109
1127
- for (BasicBlock::iterator II = CurrBB->begin (), IE = CurrBB->end ();
1128
- II != IE; ++II) {
1129
- CallInst *Call = dyn_cast<CallInst>(&*II);
1130
- Function *CalledFunc;
1110
+ auto *CI = dyn_cast<CallInst>(&I);
1111
+ if (!CI || CI->isNoBuiltin ())
1112
+ return false ;
1131
1113
1132
- if (!Call || !(CalledFunc = Call->getCalledFunction ()))
1133
- continue ;
1114
+ Function *CalledFunc = CI->getCalledFunction ();
1115
+ if (!CalledFunc)
1116
+ return false ;
1134
1117
1135
- LibFunc LF;
1136
- if (!TLI.getLibFunc (*CalledFunc, LF))
1137
- continue ;
1118
+ LibFunc LF;
1119
+ if (!TLI.getLibFunc (*CalledFunc, LF) ||
1120
+ !isLibFuncEmittable (CI->getModule (), &TLI, LF))
1121
+ return false ;
1138
1122
1139
- switch (LF) {
1140
- case LibFunc_strcmp:
1141
- case LibFunc_strncmp: {
1142
- if (StrNCmpInliner (Call, LF, BB, &DTU, DL).optimizeStrNCmp ()) {
1143
- MadeCFGChange = true ;
1144
- break ;
1145
- }
1146
- continue ;
1147
- }
1148
- default :
1149
- continue ;
1150
- }
1123
+ DomTreeUpdater DTU (&DT, DomTreeUpdater::UpdateStrategy::Lazy);
1151
1124
1152
- MadeChange = true ;
1153
- break ;
1125
+ switch (LF) {
1126
+ case LibFunc_sqrt:
1127
+ case LibFunc_sqrtf:
1128
+ case LibFunc_sqrtl:
1129
+ return foldSqrt (CI, LF, TTI, TLI, AC, DT);
1130
+ case LibFunc_strcmp:
1131
+ case LibFunc_strncmp:
1132
+ if (StrNCmpInliner (CI, LF, &DTU, DL).optimizeStrNCmp ()) {
1133
+ MadeCFGChange = true ;
1134
+ return true ;
1154
1135
}
1136
+ break ;
1137
+ default :;
1155
1138
}
1156
-
1157
- return MadeChange;
1139
+ return false ;
1158
1140
}
1159
1141
1160
1142
// / This is the entry point for folds that could be implemented in regular
@@ -1163,7 +1145,7 @@ static bool inlineLibCalls(Function &F, TargetLibraryInfo &TLI,
1163
1145
static bool foldUnusualPatterns (Function &F, DominatorTree &DT,
1164
1146
TargetTransformInfo &TTI,
1165
1147
TargetLibraryInfo &TLI, AliasAnalysis &AA,
1166
- AssumptionCache &AC) {
1148
+ AssumptionCache &AC, bool &MadeCFGChange ) {
1167
1149
bool MadeChange = false ;
1168
1150
for (BasicBlock &BB : F) {
1169
1151
// Ignore unreachable basic blocks.
@@ -1188,7 +1170,7 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
1188
1170
// NOTE: This function introduces erasing of the instruction `I`, so it
1189
1171
// needs to be called at the end of this sequence, otherwise we may make
1190
1172
// bugs.
1191
- MadeChange |= foldSqrt (I, TTI, TLI, AC, DT);
1173
+ MadeChange |= foldLibCalls (I, TTI, TLI, AC, DT, DL, MadeCFGChange );
1192
1174
}
1193
1175
}
1194
1176
@@ -1209,8 +1191,7 @@ static bool runImpl(Function &F, AssumptionCache &AC, TargetTransformInfo &TTI,
1209
1191
const DataLayout &DL = F.getParent ()->getDataLayout ();
1210
1192
TruncInstCombine TIC (AC, TLI, DL, DT);
1211
1193
MadeChange |= TIC.run (F);
1212
- MadeChange |= inlineLibCalls (F, TLI, TTI, DT, DL, MadeCFGChange);
1213
- MadeChange |= foldUnusualPatterns (F, DT, TTI, TLI, AA, AC);
1194
+ MadeChange |= foldUnusualPatterns (F, DT, TTI, TLI, AA, AC, MadeCFGChange);
1214
1195
return MadeChange;
1215
1196
}
1216
1197
0 commit comments