Skip to content

Commit 8981520

Browse files
kitaisrealnikic
authored andcommitted
[AggressiveInstCombine] Fold strcmp for short string literals
Fold strcmp() against 1-char string literals. This designates AggressiveInstCombine as the pass for libcalls simplifications that may need to change the control flow graph. Fixes llvm#58003. Differential Revision: https://reviews.llvm.org/D154725
1 parent e537c83 commit 8981520

File tree

6 files changed

+267
-82
lines changed

6 files changed

+267
-82
lines changed

llvm/include/llvm/Analysis/ValueTracking.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,10 @@ bool isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL,
124124
const DominatorTree *DT = nullptr,
125125
bool UseInstrInfo = true);
126126

127+
/// Return true if the given instruction is only used in zero comparison
128+
bool isOnlyUsedInZeroComparison(const Instruction *CxtI);
129+
130+
/// Return true if the given instruction is only used in zero equality comparison
127131
bool isOnlyUsedInZeroEqualityComparison(const Instruction *CxtI);
128132

129133
/// Return true if the given value is known to be non-zero when defined. For

llvm/include/llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
/// \file
99
///
1010
/// AggressiveInstCombiner - Combine expression patterns to form expressions
11-
/// with fewer, simple instructions. This pass does not modify the CFG.
11+
/// with fewer, simple instructions.
1212
///
1313
//===----------------------------------------------------------------------===//
1414

llvm/lib/Analysis/ValueTracking.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,13 @@ bool llvm::haveNoCommonBitsSet(const Value *LHS, const Value *RHS,
261261
return KnownBits::haveNoCommonBitsSet(LHSKnown, RHSKnown);
262262
}
263263

264+
bool llvm::isOnlyUsedInZeroComparison(const Instruction *I) {
265+
return !I->user_empty() && all_of(I->users(), [](const User *U) {
266+
ICmpInst::Predicate P;
267+
return match(U, m_ICmp(P, m_Value(), m_Zero()));
268+
});
269+
}
270+
264271
bool llvm::isOnlyUsedInZeroEqualityComparison(const Instruction *I) {
265272
return !I->user_empty() && all_of(I->users(), [](const User *U) {
266273
ICmpInst::Predicate P;

llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp

Lines changed: 163 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "llvm/Analysis/AssumptionCache.h"
2020
#include "llvm/Analysis/BasicAliasAnalysis.h"
2121
#include "llvm/Analysis/ConstantFolding.h"
22+
#include "llvm/Analysis/DomTreeUpdater.h"
2223
#include "llvm/Analysis/GlobalsModRef.h"
2324
#include "llvm/Analysis/TargetLibraryInfo.h"
2425
#include "llvm/Analysis/TargetTransformInfo.h"
@@ -28,6 +29,7 @@
2829
#include "llvm/IR/Function.h"
2930
#include "llvm/IR/IRBuilder.h"
3031
#include "llvm/IR/PatternMatch.h"
32+
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
3133
#include "llvm/Transforms/Utils/BuildLibCalls.h"
3234
#include "llvm/Transforms/Utils/Local.h"
3335

@@ -396,54 +398,6 @@ static bool tryToFPToSat(Instruction &I, TargetTransformInfo &TTI) {
396398
return true;
397399
}
398400

399-
/// Try to replace a mathlib call to sqrt with the LLVM intrinsic. This avoids
400-
/// pessimistic codegen that has to account for setting errno and can enable
401-
/// vectorization.
402-
static bool foldSqrt(Instruction &I, TargetTransformInfo &TTI,
403-
TargetLibraryInfo &TLI, AssumptionCache &AC,
404-
DominatorTree &DT) {
405-
// Match a call to sqrt mathlib function.
406-
auto *Call = dyn_cast<CallInst>(&I);
407-
if (!Call)
408-
return false;
409-
410-
Module *M = Call->getModule();
411-
LibFunc Func;
412-
if (!TLI.getLibFunc(*Call, Func) || !isLibFuncEmittable(M, &TLI, Func))
413-
return false;
414-
415-
if (Func != LibFunc_sqrt && Func != LibFunc_sqrtf && Func != LibFunc_sqrtl)
416-
return false;
417-
418-
// If (1) this is a sqrt libcall, (2) we can assume that NAN is not created
419-
// (because NNAN or the operand arg must not be less than -0.0) and (2) we
420-
// would not end up lowering to a libcall anyway (which could change the value
421-
// of errno), then:
422-
// (1) errno won't be set.
423-
// (2) it is safe to convert this to an intrinsic call.
424-
Type *Ty = Call->getType();
425-
Value *Arg = Call->getArgOperand(0);
426-
if (TTI.haveFastSqrt(Ty) &&
427-
(Call->hasNoNaNs() ||
428-
cannotBeOrderedLessThanZero(Arg, M->getDataLayout(), &TLI, 0, &AC, &I,
429-
&DT))) {
430-
IRBuilder<> Builder(&I);
431-
IRBuilderBase::FastMathFlagGuard Guard(Builder);
432-
Builder.setFastMathFlags(Call->getFastMathFlags());
433-
434-
Function *Sqrt = Intrinsic::getDeclaration(M, Intrinsic::sqrt, Ty);
435-
Value *NewSqrt = Builder.CreateCall(Sqrt, Arg, "sqrt");
436-
I.replaceAllUsesWith(NewSqrt);
437-
438-
// Explicitly erase the old call because a call with side effects is not
439-
// trivially dead.
440-
I.eraseFromParent();
441-
return true;
442-
}
443-
444-
return false;
445-
}
446-
447401
// Check if this array of constants represents a cttz table.
448402
// Iterate over the elements from \p Table by trying to find/match all
449403
// the numbers from 0 to \p InputBits that should represent cttz results.
@@ -915,13 +869,159 @@ static bool foldPatternedLoads(Instruction &I, const DataLayout &DL) {
915869
return true;
916870
}
917871

872+
/// Try to replace a mathlib call to sqrt with the LLVM intrinsic. This avoids
873+
/// pessimistic codegen that has to account for setting errno and can enable
874+
/// vectorization.
875+
static bool foldSqrt(CallInst *Call, TargetTransformInfo &TTI,
876+
TargetLibraryInfo &TLI, AssumptionCache &AC,
877+
DominatorTree &DT) {
878+
Module *M = Call->getModule();
879+
880+
// If (1) this is a sqrt libcall, (2) we can assume that NAN is not created
881+
// (because NNAN or the operand arg must not be less than -0.0) and (2) we
882+
// would not end up lowering to a libcall anyway (which could change the value
883+
// of errno), then:
884+
// (1) errno won't be set.
885+
// (2) it is safe to convert this to an intrinsic call.
886+
Type *Ty = Call->getType();
887+
Value *Arg = Call->getArgOperand(0);
888+
if (TTI.haveFastSqrt(Ty) &&
889+
(Call->hasNoNaNs() ||
890+
cannotBeOrderedLessThanZero(Arg, M->getDataLayout(), &TLI, 0, &AC, Call,
891+
&DT))) {
892+
IRBuilder<> Builder(Call);
893+
IRBuilderBase::FastMathFlagGuard Guard(Builder);
894+
Builder.setFastMathFlags(Call->getFastMathFlags());
895+
896+
Function *Sqrt = Intrinsic::getDeclaration(M, Intrinsic::sqrt, Ty);
897+
Value *NewSqrt = Builder.CreateCall(Sqrt, Arg, "sqrt");
898+
Call->replaceAllUsesWith(NewSqrt);
899+
900+
// Explicitly erase the old call because a call with side effects is not
901+
// trivially dead.
902+
Call->eraseFromParent();
903+
return true;
904+
}
905+
906+
return false;
907+
}
908+
909+
/// Try to expand strcmp(P, "x") calls.
910+
static bool expandStrcmp(CallInst *CI, DominatorTree &DT, bool &MadeCFGChange) {
911+
Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
912+
913+
// Trivial cases are optimized during inst combine
914+
if (Str1P == Str2P)
915+
return false;
916+
917+
StringRef Str1, Str2;
918+
bool HasStr1 = getConstantStringInfo(Str1P, Str1);
919+
bool HasStr2 = getConstantStringInfo(Str2P, Str2);
920+
921+
Value *NonConstantP = nullptr;
922+
StringRef ConstantStr;
923+
924+
if (!HasStr1 && HasStr2 && Str2.size() == 1) {
925+
NonConstantP = Str1P;
926+
ConstantStr = Str2;
927+
} else if (!HasStr2 && HasStr1 && Str1.size() == 1) {
928+
NonConstantP = Str2P;
929+
ConstantStr = Str1;
930+
} else {
931+
return false;
932+
}
933+
934+
// Check if strcmp result is only used in a comparison with zero
935+
if (!isOnlyUsedInZeroComparison(CI))
936+
return false;
937+
938+
// For strcmp(P, "x") do the following transformation:
939+
//
940+
// (before)
941+
// dst = strcmp(P, "x")
942+
//
943+
// (after)
944+
// v0 = P[0] - 'x'
945+
// [if v0 == 0]
946+
// v1 = P[1]
947+
// dst = phi(v0, v1)
948+
//
949+
950+
IRBuilder<> B(CI->getParent());
951+
DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
952+
953+
Type *RetType = CI->getType();
954+
955+
B.SetInsertPoint(CI);
956+
BasicBlock *InitialBB = B.GetInsertBlock();
957+
Value *Str1FirstCharacterValue =
958+
B.CreateZExt(B.CreateLoad(B.getInt8Ty(), NonConstantP), RetType);
959+
Value *Str2FirstCharacterValue =
960+
ConstantInt::get(RetType, static_cast<unsigned char>(ConstantStr[0]));
961+
Value *FirstCharacterSub =
962+
B.CreateNSWSub(Str1FirstCharacterValue, Str2FirstCharacterValue);
963+
Value *IsFirstCharacterSubZero =
964+
B.CreateICmpEQ(FirstCharacterSub, ConstantInt::get(RetType, 0));
965+
Instruction *IsFirstCharacterSubZeroBBTerminator = SplitBlockAndInsertIfThen(
966+
IsFirstCharacterSubZero, CI, /*Unreachable*/ false,
967+
/*BranchWeights*/ nullptr, &DTU);
968+
969+
B.SetInsertPoint(IsFirstCharacterSubZeroBBTerminator);
970+
B.GetInsertBlock()->setName("strcmp_expand_sub_is_zero");
971+
BasicBlock *IsFirstCharacterSubZeroBB = B.GetInsertBlock();
972+
Value *Str1SecondCharacterValue = B.CreateZExt(
973+
B.CreateLoad(B.getInt8Ty(), B.CreateConstInBoundsGEP1_64(
974+
B.getInt8Ty(), NonConstantP, 1)),
975+
RetType);
976+
977+
B.SetInsertPoint(CI);
978+
B.GetInsertBlock()->setName("strcmp_expand_sub_join");
979+
980+
PHINode *Result = B.CreatePHI(RetType, 2);
981+
Result->addIncoming(FirstCharacterSub, InitialBB);
982+
Result->addIncoming(Str1SecondCharacterValue, IsFirstCharacterSubZeroBB);
983+
984+
CI->replaceAllUsesWith(Result);
985+
CI->eraseFromParent();
986+
987+
MadeCFGChange = true;
988+
989+
return true;
990+
}
991+
992+
static bool foldLibraryCalls(Instruction &I, TargetTransformInfo &TTI,
993+
TargetLibraryInfo &TLI, DominatorTree &DT,
994+
AssumptionCache &AC, bool &MadeCFGChange) {
995+
CallInst *CI = dyn_cast<CallInst>(&I);
996+
if (!CI)
997+
return false;
998+
999+
LibFunc Func;
1000+
Module *M = I.getModule();
1001+
if (!TLI.getLibFunc(*CI, Func) || !isLibFuncEmittable(M, &TLI, Func))
1002+
return false;
1003+
1004+
switch (Func) {
1005+
case LibFunc_sqrt:
1006+
case LibFunc_sqrtf:
1007+
case LibFunc_sqrtl:
1008+
return foldSqrt(CI, TTI, TLI, AC, DT);
1009+
case LibFunc_strcmp:
1010+
return expandStrcmp(CI, DT, MadeCFGChange);
1011+
default:
1012+
break;
1013+
}
1014+
1015+
return false;
1016+
}
1017+
9181018
/// This is the entry point for folds that could be implemented in regular
9191019
/// InstCombine, but they are separated because they are not expected to
9201020
/// occur frequently and/or have more than a constant-length pattern match.
9211021
static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
9221022
TargetTransformInfo &TTI,
9231023
TargetLibraryInfo &TLI, AliasAnalysis &AA,
924-
AssumptionCache &AC) {
1024+
AssumptionCache &AC, bool &MadeCFGChange) {
9251025
bool MadeChange = false;
9261026
for (BasicBlock &BB : F) {
9271027
// Ignore unreachable basic blocks.
@@ -946,7 +1046,7 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
9461046
// NOTE: This function introduces erasing of the instruction `I`, so it
9471047
// needs to be called at the end of this sequence, otherwise we may make
9481048
// bugs.
949-
MadeChange |= foldSqrt(I, TTI, TLI, AC, DT);
1049+
MadeChange |= foldLibraryCalls(I, TTI, TLI, DT, AC, MadeCFGChange);
9501050
}
9511051
}
9521052

@@ -962,12 +1062,12 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
9621062
/// handled in the callers of this function.
9631063
static bool runImpl(Function &F, AssumptionCache &AC, TargetTransformInfo &TTI,
9641064
TargetLibraryInfo &TLI, DominatorTree &DT,
965-
AliasAnalysis &AA) {
1065+
AliasAnalysis &AA, bool &ChangedCFG) {
9661066
bool MadeChange = false;
9671067
const DataLayout &DL = F.getParent()->getDataLayout();
9681068
TruncInstCombine TIC(AC, TLI, DL, DT);
9691069
MadeChange |= TIC.run(F);
970-
MadeChange |= foldUnusualPatterns(F, DT, TTI, TLI, AA, AC);
1070+
MadeChange |= foldUnusualPatterns(F, DT, TTI, TLI, AA, AC, ChangedCFG);
9711071
return MadeChange;
9721072
}
9731073

@@ -978,12 +1078,21 @@ PreservedAnalyses AggressiveInstCombinePass::run(Function &F,
9781078
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
9791079
auto &TTI = AM.getResult<TargetIRAnalysis>(F);
9801080
auto &AA = AM.getResult<AAManager>(F);
981-
if (!runImpl(F, AC, TTI, TLI, DT, AA)) {
1081+
1082+
bool MadeCFGChange = false;
1083+
1084+
if (!runImpl(F, AC, TTI, TLI, DT, AA, MadeCFGChange)) {
9821085
// No changes, all analyses are preserved.
9831086
return PreservedAnalyses::all();
9841087
}
1088+
9851089
// Mark all the analyses that instcombine updates as preserved.
9861090
PreservedAnalyses PA;
987-
PA.preserveSet<CFGAnalyses>();
1091+
1092+
if (MadeCFGChange)
1093+
PA.preserve<DominatorTreeAnalysis>();
1094+
else
1095+
PA.preserveSet<CFGAnalyses>();
1096+
9881097
return PA;
9891098
}

llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -227,21 +227,9 @@ static Value *convertStrToInt(CallInst *CI, StringRef &Str, Value *EndPtr,
227227
return ConstantInt::get(RetTy, Result);
228228
}
229229

230-
static bool isOnlyUsedInComparisonWithZero(Value *V) {
231-
for (User *U : V->users()) {
232-
if (ICmpInst *IC = dyn_cast<ICmpInst>(U))
233-
if (Constant *C = dyn_cast<Constant>(IC->getOperand(1)))
234-
if (C->isNullValue())
235-
continue;
236-
// Unknown instruction.
237-
return false;
238-
}
239-
return true;
240-
}
241-
242230
static bool canTransformToMemCmp(CallInst *CI, Value *Str, uint64_t Len,
243231
const DataLayout &DL) {
244-
if (!isOnlyUsedInComparisonWithZero(CI))
232+
if (!isOnlyUsedInZeroComparison(CI))
245233
return false;
246234

247235
if (!isDereferenceableAndAlignedPointer(Str, Align(1), APInt(64, Len), DL))

0 commit comments

Comments
 (0)