19
19
#include " llvm/Analysis/AssumptionCache.h"
20
20
#include " llvm/Analysis/BasicAliasAnalysis.h"
21
21
#include " llvm/Analysis/ConstantFolding.h"
22
+ #include " llvm/Analysis/DomTreeUpdater.h"
22
23
#include " llvm/Analysis/GlobalsModRef.h"
23
24
#include " llvm/Analysis/TargetLibraryInfo.h"
24
25
#include " llvm/Analysis/TargetTransformInfo.h"
28
29
#include " llvm/IR/Function.h"
29
30
#include " llvm/IR/IRBuilder.h"
30
31
#include " llvm/IR/PatternMatch.h"
32
+ #include " llvm/Transforms/Utils/BasicBlockUtils.h"
31
33
#include " llvm/Transforms/Utils/BuildLibCalls.h"
32
34
#include " llvm/Transforms/Utils/Local.h"
33
35
@@ -396,54 +398,6 @@ static bool tryToFPToSat(Instruction &I, TargetTransformInfo &TTI) {
396
398
return true ;
397
399
}
398
400
399
- // / Try to replace a mathlib call to sqrt with the LLVM intrinsic. This avoids
400
- // / pessimistic codegen that has to account for setting errno and can enable
401
- // / vectorization.
402
- static bool foldSqrt (Instruction &I, TargetTransformInfo &TTI,
403
- TargetLibraryInfo &TLI, AssumptionCache &AC,
404
- DominatorTree &DT) {
405
- // Match a call to sqrt mathlib function.
406
- auto *Call = dyn_cast<CallInst>(&I);
407
- if (!Call)
408
- return false ;
409
-
410
- Module *M = Call->getModule ();
411
- LibFunc Func;
412
- if (!TLI.getLibFunc (*Call, Func) || !isLibFuncEmittable (M, &TLI, Func))
413
- return false ;
414
-
415
- if (Func != LibFunc_sqrt && Func != LibFunc_sqrtf && Func != LibFunc_sqrtl)
416
- return false ;
417
-
418
- // If (1) this is a sqrt libcall, (2) we can assume that NAN is not created
419
- // (because NNAN or the operand arg must not be less than -0.0) and (2) we
420
- // would not end up lowering to a libcall anyway (which could change the value
421
- // of errno), then:
422
- // (1) errno won't be set.
423
- // (2) it is safe to convert this to an intrinsic call.
424
- Type *Ty = Call->getType ();
425
- Value *Arg = Call->getArgOperand (0 );
426
- if (TTI.haveFastSqrt (Ty) &&
427
- (Call->hasNoNaNs () ||
428
- cannotBeOrderedLessThanZero (Arg, M->getDataLayout (), &TLI, 0 , &AC, &I,
429
- &DT))) {
430
- IRBuilder<> Builder (&I);
431
- IRBuilderBase::FastMathFlagGuard Guard (Builder);
432
- Builder.setFastMathFlags (Call->getFastMathFlags ());
433
-
434
- Function *Sqrt = Intrinsic::getDeclaration (M, Intrinsic::sqrt, Ty);
435
- Value *NewSqrt = Builder.CreateCall (Sqrt, Arg, " sqrt" );
436
- I.replaceAllUsesWith (NewSqrt);
437
-
438
- // Explicitly erase the old call because a call with side effects is not
439
- // trivially dead.
440
- I.eraseFromParent ();
441
- return true ;
442
- }
443
-
444
- return false ;
445
- }
446
-
447
401
// Check if this array of constants represents a cttz table.
448
402
// Iterate over the elements from \p Table by trying to find/match all
449
403
// the numbers from 0 to \p InputBits that should represent cttz results.
@@ -915,13 +869,159 @@ static bool foldPatternedLoads(Instruction &I, const DataLayout &DL) {
915
869
return true ;
916
870
}
917
871
872
+ // / Try to replace a mathlib call to sqrt with the LLVM intrinsic. This avoids
873
+ // / pessimistic codegen that has to account for setting errno and can enable
874
+ // / vectorization.
875
+ static bool foldSqrt (CallInst *Call, TargetTransformInfo &TTI,
876
+ TargetLibraryInfo &TLI, AssumptionCache &AC,
877
+ DominatorTree &DT) {
878
+ Module *M = Call->getModule ();
879
+
880
+ // If (1) this is a sqrt libcall, (2) we can assume that NAN is not created
881
+ // (because NNAN or the operand arg must not be less than -0.0) and (2) we
882
+ // would not end up lowering to a libcall anyway (which could change the value
883
+ // of errno), then:
884
+ // (1) errno won't be set.
885
+ // (2) it is safe to convert this to an intrinsic call.
886
+ Type *Ty = Call->getType ();
887
+ Value *Arg = Call->getArgOperand (0 );
888
+ if (TTI.haveFastSqrt (Ty) &&
889
+ (Call->hasNoNaNs () ||
890
+ cannotBeOrderedLessThanZero (Arg, M->getDataLayout (), &TLI, 0 , &AC, Call,
891
+ &DT))) {
892
+ IRBuilder<> Builder (Call);
893
+ IRBuilderBase::FastMathFlagGuard Guard (Builder);
894
+ Builder.setFastMathFlags (Call->getFastMathFlags ());
895
+
896
+ Function *Sqrt = Intrinsic::getDeclaration (M, Intrinsic::sqrt, Ty);
897
+ Value *NewSqrt = Builder.CreateCall (Sqrt, Arg, " sqrt" );
898
+ Call->replaceAllUsesWith (NewSqrt);
899
+
900
+ // Explicitly erase the old call because a call with side effects is not
901
+ // trivially dead.
902
+ Call->eraseFromParent ();
903
+ return true ;
904
+ }
905
+
906
+ return false ;
907
+ }
908
+
909
+ // / Try to expand strcmp(P, "x") calls.
910
+ static bool expandStrcmp (CallInst *CI, DominatorTree &DT, bool &MadeCFGChange) {
911
+ Value *Str1P = CI->getArgOperand (0 ), *Str2P = CI->getArgOperand (1 );
912
+
913
+ // Trivial cases are optimized during inst combine
914
+ if (Str1P == Str2P)
915
+ return false ;
916
+
917
+ StringRef Str1, Str2;
918
+ bool HasStr1 = getConstantStringInfo (Str1P, Str1);
919
+ bool HasStr2 = getConstantStringInfo (Str2P, Str2);
920
+
921
+ Value *NonConstantP = nullptr ;
922
+ StringRef ConstantStr;
923
+
924
+ if (!HasStr1 && HasStr2 && Str2.size () == 1 ) {
925
+ NonConstantP = Str1P;
926
+ ConstantStr = Str2;
927
+ } else if (!HasStr2 && HasStr1 && Str1.size () == 1 ) {
928
+ NonConstantP = Str2P;
929
+ ConstantStr = Str1;
930
+ } else {
931
+ return false ;
932
+ }
933
+
934
+ // Check if strcmp result is only used in a comparison with zero
935
+ if (!isOnlyUsedInZeroComparison (CI))
936
+ return false ;
937
+
938
+ // For strcmp(P, "x") do the following transformation:
939
+ //
940
+ // (before)
941
+ // dst = strcmp(P, "x")
942
+ //
943
+ // (after)
944
+ // v0 = P[0] - 'x'
945
+ // [if v0 == 0]
946
+ // v1 = P[1]
947
+ // dst = phi(v0, v1)
948
+ //
949
+
950
+ IRBuilder<> B (CI->getParent ());
951
+ DomTreeUpdater DTU (DT, DomTreeUpdater::UpdateStrategy::Lazy);
952
+
953
+ Type *RetType = CI->getType ();
954
+
955
+ B.SetInsertPoint (CI);
956
+ BasicBlock *InitialBB = B.GetInsertBlock ();
957
+ Value *Str1FirstCharacterValue =
958
+ B.CreateZExt (B.CreateLoad (B.getInt8Ty (), NonConstantP), RetType);
959
+ Value *Str2FirstCharacterValue =
960
+ ConstantInt::get (RetType, static_cast <unsigned char >(ConstantStr[0 ]));
961
+ Value *FirstCharacterSub =
962
+ B.CreateNSWSub (Str1FirstCharacterValue, Str2FirstCharacterValue);
963
+ Value *IsFirstCharacterSubZero =
964
+ B.CreateICmpEQ (FirstCharacterSub, ConstantInt::get (RetType, 0 ));
965
+ Instruction *IsFirstCharacterSubZeroBBTerminator = SplitBlockAndInsertIfThen (
966
+ IsFirstCharacterSubZero, CI, /* Unreachable*/ false ,
967
+ /* BranchWeights*/ nullptr , &DTU);
968
+
969
+ B.SetInsertPoint (IsFirstCharacterSubZeroBBTerminator);
970
+ B.GetInsertBlock ()->setName (" strcmp_expand_sub_is_zero" );
971
+ BasicBlock *IsFirstCharacterSubZeroBB = B.GetInsertBlock ();
972
+ Value *Str1SecondCharacterValue = B.CreateZExt (
973
+ B.CreateLoad (B.getInt8Ty (), B.CreateConstInBoundsGEP1_64 (
974
+ B.getInt8Ty (), NonConstantP, 1 )),
975
+ RetType);
976
+
977
+ B.SetInsertPoint (CI);
978
+ B.GetInsertBlock ()->setName (" strcmp_expand_sub_join" );
979
+
980
+ PHINode *Result = B.CreatePHI (RetType, 2 );
981
+ Result->addIncoming (FirstCharacterSub, InitialBB);
982
+ Result->addIncoming (Str1SecondCharacterValue, IsFirstCharacterSubZeroBB);
983
+
984
+ CI->replaceAllUsesWith (Result);
985
+ CI->eraseFromParent ();
986
+
987
+ MadeCFGChange = true ;
988
+
989
+ return true ;
990
+ }
991
+
992
+ static bool foldLibraryCalls (Instruction &I, TargetTransformInfo &TTI,
993
+ TargetLibraryInfo &TLI, DominatorTree &DT,
994
+ AssumptionCache &AC, bool &MadeCFGChange) {
995
+ CallInst *CI = dyn_cast<CallInst>(&I);
996
+ if (!CI)
997
+ return false ;
998
+
999
+ LibFunc Func;
1000
+ Module *M = I.getModule ();
1001
+ if (!TLI.getLibFunc (*CI, Func) || !isLibFuncEmittable (M, &TLI, Func))
1002
+ return false ;
1003
+
1004
+ switch (Func) {
1005
+ case LibFunc_sqrt:
1006
+ case LibFunc_sqrtf:
1007
+ case LibFunc_sqrtl:
1008
+ return foldSqrt (CI, TTI, TLI, AC, DT);
1009
+ case LibFunc_strcmp:
1010
+ return expandStrcmp (CI, DT, MadeCFGChange);
1011
+ default :
1012
+ break ;
1013
+ }
1014
+
1015
+ return false ;
1016
+ }
1017
+
918
1018
// / This is the entry point for folds that could be implemented in regular
919
1019
// / InstCombine, but they are separated because they are not expected to
920
1020
// / occur frequently and/or have more than a constant-length pattern match.
921
1021
static bool foldUnusualPatterns (Function &F, DominatorTree &DT,
922
1022
TargetTransformInfo &TTI,
923
1023
TargetLibraryInfo &TLI, AliasAnalysis &AA,
924
- AssumptionCache &AC) {
1024
+ AssumptionCache &AC, bool &MadeCFGChange ) {
925
1025
bool MadeChange = false ;
926
1026
for (BasicBlock &BB : F) {
927
1027
// Ignore unreachable basic blocks.
@@ -946,7 +1046,7 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
946
1046
// NOTE: This function introduces erasing of the instruction `I`, so it
947
1047
// needs to be called at the end of this sequence, otherwise we may make
948
1048
// bugs.
949
- MadeChange |= foldSqrt (I, TTI, TLI, AC, DT );
1049
+ MadeChange |= foldLibraryCalls (I, TTI, TLI, DT, AC, MadeCFGChange );
950
1050
}
951
1051
}
952
1052
@@ -962,12 +1062,12 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
962
1062
// / handled in the callers of this function.
963
1063
static bool runImpl (Function &F, AssumptionCache &AC, TargetTransformInfo &TTI,
964
1064
TargetLibraryInfo &TLI, DominatorTree &DT,
965
- AliasAnalysis &AA) {
1065
+ AliasAnalysis &AA, bool &ChangedCFG ) {
966
1066
bool MadeChange = false ;
967
1067
const DataLayout &DL = F.getParent ()->getDataLayout ();
968
1068
TruncInstCombine TIC (AC, TLI, DL, DT);
969
1069
MadeChange |= TIC.run (F);
970
- MadeChange |= foldUnusualPatterns (F, DT, TTI, TLI, AA, AC);
1070
+ MadeChange |= foldUnusualPatterns (F, DT, TTI, TLI, AA, AC, ChangedCFG );
971
1071
return MadeChange;
972
1072
}
973
1073
@@ -978,12 +1078,21 @@ PreservedAnalyses AggressiveInstCombinePass::run(Function &F,
978
1078
auto &DT = AM.getResult <DominatorTreeAnalysis>(F);
979
1079
auto &TTI = AM.getResult <TargetIRAnalysis>(F);
980
1080
auto &AA = AM.getResult <AAManager>(F);
981
- if (!runImpl (F, AC, TTI, TLI, DT, AA)) {
1081
+
1082
+ bool MadeCFGChange = false ;
1083
+
1084
+ if (!runImpl (F, AC, TTI, TLI, DT, AA, MadeCFGChange)) {
982
1085
// No changes, all analyses are preserved.
983
1086
return PreservedAnalyses::all ();
984
1087
}
1088
+
985
1089
// Mark all the analyses that instcombine updates as preserved.
986
1090
PreservedAnalyses PA;
987
- PA.preserveSet <CFGAnalyses>();
1091
+
1092
+ if (MadeCFGChange)
1093
+ PA.preserve <DominatorTreeAnalysis>();
1094
+ else
1095
+ PA.preserveSet <CFGAnalyses>();
1096
+
988
1097
return PA;
989
1098
}
0 commit comments