19
19
#include " llvm/Analysis/AssumptionCache.h"
20
20
#include " llvm/Analysis/BasicAliasAnalysis.h"
21
21
#include " llvm/Analysis/ConstantFolding.h"
22
- #include " llvm/Analysis/DomTreeUpdater.h"
23
22
#include " llvm/Analysis/GlobalsModRef.h"
24
23
#include " llvm/Analysis/TargetLibraryInfo.h"
25
24
#include " llvm/Analysis/TargetTransformInfo.h"
29
28
#include " llvm/IR/Function.h"
30
29
#include " llvm/IR/IRBuilder.h"
31
30
#include " llvm/IR/PatternMatch.h"
32
- #include " llvm/Transforms/Utils/BasicBlockUtils.h"
33
31
#include " llvm/Transforms/Utils/BuildLibCalls.h"
34
32
#include " llvm/Transforms/Utils/Local.h"
35
33
@@ -398,6 +396,54 @@ static bool tryToFPToSat(Instruction &I, TargetTransformInfo &TTI) {
398
396
return true ;
399
397
}
400
398
399
+ // / Try to replace a mathlib call to sqrt with the LLVM intrinsic. This avoids
400
+ // / pessimistic codegen that has to account for setting errno and can enable
401
+ // / vectorization.
402
+ static bool foldSqrt (Instruction &I, TargetTransformInfo &TTI,
403
+ TargetLibraryInfo &TLI, AssumptionCache &AC,
404
+ DominatorTree &DT) {
405
+ // Match a call to sqrt mathlib function.
406
+ auto *Call = dyn_cast<CallInst>(&I);
407
+ if (!Call)
408
+ return false ;
409
+
410
+ Module *M = Call->getModule ();
411
+ LibFunc Func;
412
+ if (!TLI.getLibFunc (*Call, Func) || !isLibFuncEmittable (M, &TLI, Func))
413
+ return false ;
414
+
415
+ if (Func != LibFunc_sqrt && Func != LibFunc_sqrtf && Func != LibFunc_sqrtl)
416
+ return false ;
417
+
418
+ // If (1) this is a sqrt libcall, (2) we can assume that NAN is not created
419
+ // (because NNAN or the operand arg must not be less than -0.0) and (2) we
420
+ // would not end up lowering to a libcall anyway (which could change the value
421
+ // of errno), then:
422
+ // (1) errno won't be set.
423
+ // (2) it is safe to convert this to an intrinsic call.
424
+ Type *Ty = Call->getType ();
425
+ Value *Arg = Call->getArgOperand (0 );
426
+ if (TTI.haveFastSqrt (Ty) &&
427
+ (Call->hasNoNaNs () ||
428
+ cannotBeOrderedLessThanZero (Arg, M->getDataLayout (), &TLI, 0 , &AC, &I,
429
+ &DT))) {
430
+ IRBuilder<> Builder (&I);
431
+ IRBuilderBase::FastMathFlagGuard Guard (Builder);
432
+ Builder.setFastMathFlags (Call->getFastMathFlags ());
433
+
434
+ Function *Sqrt = Intrinsic::getDeclaration (M, Intrinsic::sqrt, Ty);
435
+ Value *NewSqrt = Builder.CreateCall (Sqrt, Arg, " sqrt" );
436
+ I.replaceAllUsesWith (NewSqrt);
437
+
438
+ // Explicitly erase the old call because a call with side effects is not
439
+ // trivially dead.
440
+ I.eraseFromParent ();
441
+ return true ;
442
+ }
443
+
444
+ return false ;
445
+ }
446
+
401
447
// Check if this array of constants represents a cttz table.
402
448
// Iterate over the elements from \p Table by trying to find/match all
403
449
// the numbers from 0 to \p InputBits that should represent cttz results.
@@ -869,199 +915,13 @@ static bool foldPatternedLoads(Instruction &I, const DataLayout &DL) {
869
915
return true ;
870
916
}
871
917
872
- // / Try to replace a mathlib call to sqrt with the LLVM intrinsic. This avoids
873
- // / pessimistic codegen that has to account for setting errno and can enable
874
- // / vectorization.
875
- static bool foldSqrt (CallInst *Call, TargetTransformInfo &TTI,
876
- TargetLibraryInfo &TLI, AssumptionCache &AC,
877
- DominatorTree &DT) {
878
- Module *M = Call->getModule ();
879
-
880
- // If (1) this is a sqrt libcall, (2) we can assume that NAN is not created
881
- // (because NNAN or the operand arg must not be less than -0.0) and (2) we
882
- // would not end up lowering to a libcall anyway (which could change the value
883
- // of errno), then:
884
- // (1) errno won't be set.
885
- // (2) it is safe to convert this to an intrinsic call.
886
- Type *Ty = Call->getType ();
887
- Value *Arg = Call->getArgOperand (0 );
888
- if (TTI.haveFastSqrt (Ty) &&
889
- (Call->hasNoNaNs () ||
890
- cannotBeOrderedLessThanZero (Arg, M->getDataLayout (), &TLI, 0 , &AC, Call,
891
- &DT))) {
892
- IRBuilder<> Builder (Call);
893
- IRBuilderBase::FastMathFlagGuard Guard (Builder);
894
- Builder.setFastMathFlags (Call->getFastMathFlags ());
895
-
896
- Function *Sqrt = Intrinsic::getDeclaration (M, Intrinsic::sqrt, Ty);
897
- Value *NewSqrt = Builder.CreateCall (Sqrt, Arg, " sqrt" );
898
- Call->replaceAllUsesWith (NewSqrt);
899
-
900
- // Explicitly erase the old call because a call with side effects is not
901
- // trivially dead.
902
- Call->eraseFromParent ();
903
- return true ;
904
- }
905
-
906
- return false ;
907
- }
908
-
909
- // / Try to expand strcmp(P, string_literal) where string_literal size is 1 or 2
910
- static bool expandStrcmp (CallInst *CI, DominatorTree &DT, bool &MadeCFGChange) {
911
- Value *Str1P = CI->getArgOperand (0 ), *Str2P = CI->getArgOperand (1 );
912
-
913
- // Trivial cases are optimized during inst combine
914
- if (Str1P == Str2P)
915
- return false ;
916
-
917
- StringRef Str1, Str2;
918
- bool HasStr1 = getConstantStringInfo (Str1P, Str1);
919
- bool HasStr2 = getConstantStringInfo (Str2P, Str2);
920
-
921
- Value *NonConstantP = nullptr ;
922
- StringRef ConstantStr;
923
-
924
- if (!HasStr1 && HasStr2) {
925
- NonConstantP = Str1P;
926
- ConstantStr = Str2;
927
- } else if (!HasStr2 && HasStr1) {
928
- NonConstantP = Str2P;
929
- ConstantStr = Str1;
930
- } else {
931
- return false ;
932
- }
933
-
934
- size_t ConstantStrSize = ConstantStr.size ();
935
-
936
- // Trivial cases are optimized during inst combine
937
- if (ConstantStrSize == 0 || ConstantStrSize > 2 )
938
- return false ;
939
-
940
- // Check if strcmp result is only used in a comparison with zero
941
- if (!isOnlyUsedInZeroComparison (CI))
942
- return false ;
943
-
944
- // For strcmp(P, "x") do the following transformation:
945
- //
946
- // (before)
947
- // dst = strcmp(P, "x")
948
- //
949
- // (after)
950
- // v0 = P[0] - 'x'
951
- // [if v0 == 0]
952
- // v1 = P[1]
953
- // dst = phi(v0, v1)
954
- //
955
- // For strcmp(P, "xy") do the following transformation:
956
- //
957
- // (before)
958
- // dst = strcmp(P, "xy")
959
- //
960
- // (after)
961
- // v0 = P[0] - 'x'
962
- // [if v0 == 0]
963
- // v1 = P[1] - 'y'
964
- // [if v1 == 0]
965
- // v2 = P[2]
966
- // dst = phi(v0, v1, v2)
967
- //
968
-
969
- IRBuilder<> B (CI->getParent ());
970
- DomTreeUpdater DTU (DT, DomTreeUpdater::UpdateStrategy::Lazy);
971
-
972
- Type *RetType = CI->getType ();
973
-
974
- BasicBlock *InitialBB = CI->getParent ();
975
- BasicBlock *JoinBlock = SplitBlock (InitialBB, CI, &DTU);
976
- JoinBlock->setName (" strcmp_expand_sub_join" );
977
-
978
- B.SetInsertPoint (CI);
979
- PHINode *ResultPHI = B.CreatePHI (RetType, ConstantStrSize + 1 );
980
-
981
- B.SetInsertPoint (InitialBB);
982
- InitialBB->getTerminator ()->eraseFromParent ();
983
-
984
- SmallVector<DominatorTree::UpdateType, 4 > DTUpdates;
985
-
986
- size_t CharacterIndexToCheck = 0 ;
987
- for (; CharacterIndexToCheck < ConstantStrSize; ++CharacterIndexToCheck) {
988
- Value *StrCharacterValue = B.CreateZExt (
989
- B.CreateLoad (B.getInt8Ty (),
990
- B.CreateConstInBoundsGEP1_64 (B.getInt8Ty (), NonConstantP,
991
- CharacterIndexToCheck)),
992
- RetType);
993
- Value *ConstantStrCharacterValue = ConstantInt::get (
994
- RetType,
995
- static_cast <unsigned char >(ConstantStr[CharacterIndexToCheck]));
996
- Value *CharacterSub =
997
- B.CreateNSWSub (StrCharacterValue, ConstantStrCharacterValue);
998
- Value *CharacterSubIsZero =
999
- B.CreateICmpEQ (CharacterSub, ConstantInt::get (RetType, 0 ));
1000
- BasicBlock *CharacterSubIsZeroBB =
1001
- BasicBlock::Create (B.getContext (), " strcmp_expand_sub_is_zero" ,
1002
- InitialBB->getParent (), JoinBlock);
1003
- B.CreateCondBr (CharacterSubIsZero, CharacterSubIsZeroBB, JoinBlock);
1004
-
1005
- ResultPHI->addIncoming (CharacterSub, B.GetInsertBlock ());
1006
- DTUpdates.emplace_back (DominatorTree::Insert, B.GetInsertBlock (),
1007
- CharacterSubIsZeroBB);
1008
-
1009
- B.SetInsertPoint (CharacterSubIsZeroBB);
1010
- DTUpdates.emplace_back (DominatorTree::Insert, CharacterSubIsZeroBB,
1011
- JoinBlock);
1012
- }
1013
-
1014
- Value *StrLastCharacterValue = B.CreateZExt (
1015
- B.CreateLoad (B.getInt8Ty (),
1016
- B.CreateConstInBoundsGEP1_64 (B.getInt8Ty (), NonConstantP,
1017
- CharacterIndexToCheck)),
1018
- RetType);
1019
- ResultPHI->addIncoming (StrLastCharacterValue, B.GetInsertBlock ());
1020
- B.CreateBr (JoinBlock);
1021
-
1022
- DTU.applyUpdates (DTUpdates);
1023
-
1024
- CI->replaceAllUsesWith (ResultPHI);
1025
- CI->eraseFromParent ();
1026
-
1027
- MadeCFGChange = true ;
1028
-
1029
- return true ;
1030
- }
1031
-
1032
- static bool foldLibraryCalls (Instruction &I, TargetTransformInfo &TTI,
1033
- TargetLibraryInfo &TLI, DominatorTree &DT,
1034
- AssumptionCache &AC, bool &MadeCFGChange) {
1035
- CallInst *CI = dyn_cast<CallInst>(&I);
1036
- if (!CI)
1037
- return false ;
1038
-
1039
- LibFunc Func;
1040
- Module *M = I.getModule ();
1041
- if (!TLI.getLibFunc (*CI, Func) || !isLibFuncEmittable (M, &TLI, Func))
1042
- return false ;
1043
-
1044
- switch (Func) {
1045
- case LibFunc_sqrt:
1046
- case LibFunc_sqrtf:
1047
- case LibFunc_sqrtl:
1048
- return foldSqrt (CI, TTI, TLI, AC, DT);
1049
- case LibFunc_strcmp:
1050
- return expandStrcmp (CI, DT, MadeCFGChange);
1051
- default :
1052
- break ;
1053
- }
1054
-
1055
- return false ;
1056
- }
1057
-
1058
918
// / This is the entry point for folds that could be implemented in regular
1059
919
// / InstCombine, but they are separated because they are not expected to
1060
920
// / occur frequently and/or have more than a constant-length pattern match.
1061
921
static bool foldUnusualPatterns (Function &F, DominatorTree &DT,
1062
922
TargetTransformInfo &TTI,
1063
923
TargetLibraryInfo &TLI, AliasAnalysis &AA,
1064
- AssumptionCache &AC, bool &MadeCFGChange ) {
924
+ AssumptionCache &AC) {
1065
925
bool MadeChange = false ;
1066
926
for (BasicBlock &BB : F) {
1067
927
// Ignore unreachable basic blocks.
@@ -1086,7 +946,7 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
1086
946
// NOTE: This function introduces erasing of the instruction `I`, so it
1087
947
// needs to be called at the end of this sequence, otherwise we may make
1088
948
// bugs.
1089
- MadeChange |= foldLibraryCalls (I, TTI, TLI, DT, AC, MadeCFGChange );
949
+ MadeChange |= foldSqrt (I, TTI, TLI, AC, DT );
1090
950
}
1091
951
}
1092
952
@@ -1102,12 +962,12 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
1102
962
// / handled in the callers of this function.
1103
963
static bool runImpl (Function &F, AssumptionCache &AC, TargetTransformInfo &TTI,
1104
964
TargetLibraryInfo &TLI, DominatorTree &DT,
1105
- AliasAnalysis &AA, bool &ChangedCFG ) {
965
+ AliasAnalysis &AA) {
1106
966
bool MadeChange = false ;
1107
967
const DataLayout &DL = F.getParent ()->getDataLayout ();
1108
968
TruncInstCombine TIC (AC, TLI, DL, DT);
1109
969
MadeChange |= TIC.run (F);
1110
- MadeChange |= foldUnusualPatterns (F, DT, TTI, TLI, AA, AC, ChangedCFG );
970
+ MadeChange |= foldUnusualPatterns (F, DT, TTI, TLI, AA, AC);
1111
971
return MadeChange;
1112
972
}
1113
973
@@ -1118,21 +978,12 @@ PreservedAnalyses AggressiveInstCombinePass::run(Function &F,
1118
978
auto &DT = AM.getResult <DominatorTreeAnalysis>(F);
1119
979
auto &TTI = AM.getResult <TargetIRAnalysis>(F);
1120
980
auto &AA = AM.getResult <AAManager>(F);
1121
-
1122
- bool MadeCFGChange = false ;
1123
-
1124
- if (!runImpl (F, AC, TTI, TLI, DT, AA, MadeCFGChange)) {
981
+ if (!runImpl (F, AC, TTI, TLI, DT, AA)) {
1125
982
// No changes, all analyses are preserved.
1126
983
return PreservedAnalyses::all ();
1127
984
}
1128
-
1129
985
// Mark all the analyses that instcombine updates as preserved.
1130
986
PreservedAnalyses PA;
1131
-
1132
- if (MadeCFGChange)
1133
- PA.preserve <DominatorTreeAnalysis>();
1134
- else
1135
- PA.preserveSet <CFGAnalyses>();
1136
-
987
+ PA.preserveSet <CFGAnalyses>();
1137
988
return PA;
1138
989
}
0 commit comments