Skip to content

Commit 0b779b0

Browse files
committed
Revert "[AggressiveInstCombine] Fold strcmp for short string literals"
This reverts commit 5dde755, cbfcf90 and 8981520 due to a miscompile introduced in 8981520 (see https://reviews.llvm.org/D154725#4568845 for details) Differential Revision: https://reviews.llvm.org/D157430
1 parent 98ab87f commit 0b779b0

File tree

6 files changed

+94
-427
lines changed

6 files changed

+94
-427
lines changed

llvm/include/llvm/Analysis/ValueTracking.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -124,10 +124,6 @@ bool isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL,
124124
const DominatorTree *DT = nullptr,
125125
bool UseInstrInfo = true);
126126

127-
/// Return true if the given instruction is only used in zero comparison
128-
bool isOnlyUsedInZeroComparison(const Instruction *CxtI);
129-
130-
/// Return true if the given instruction is only used in zero equality comparison
131127
bool isOnlyUsedInZeroEqualityComparison(const Instruction *CxtI);
132128

133129
/// Return true if the given value is known to be non-zero when defined. For

llvm/include/llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
/// \file
99
///
1010
/// AggressiveInstCombiner - Combine expression patterns to form expressions
11-
/// with fewer, simple instructions.
11+
/// with fewer, simple instructions. This pass does not modify the CFG.
1212
///
1313
//===----------------------------------------------------------------------===//
1414

llvm/lib/Analysis/ValueTracking.cpp

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -261,13 +261,6 @@ bool llvm::haveNoCommonBitsSet(const Value *LHS, const Value *RHS,
261261
return KnownBits::haveNoCommonBitsSet(LHSKnown, RHSKnown);
262262
}
263263

264-
bool llvm::isOnlyUsedInZeroComparison(const Instruction *I) {
265-
return !I->user_empty() && all_of(I->users(), [](const User *U) {
266-
ICmpInst::Predicate P;
267-
return match(U, m_ICmp(P, m_Value(), m_Zero()));
268-
});
269-
}
270-
271264
bool llvm::isOnlyUsedInZeroEqualityComparison(const Instruction *I) {
272265
return !I->user_empty() && all_of(I->users(), [](const User *U) {
273266
ICmpInst::Predicate P;

llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp

Lines changed: 54 additions & 203 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
#include "llvm/Analysis/AssumptionCache.h"
2020
#include "llvm/Analysis/BasicAliasAnalysis.h"
2121
#include "llvm/Analysis/ConstantFolding.h"
22-
#include "llvm/Analysis/DomTreeUpdater.h"
2322
#include "llvm/Analysis/GlobalsModRef.h"
2423
#include "llvm/Analysis/TargetLibraryInfo.h"
2524
#include "llvm/Analysis/TargetTransformInfo.h"
@@ -29,7 +28,6 @@
2928
#include "llvm/IR/Function.h"
3029
#include "llvm/IR/IRBuilder.h"
3130
#include "llvm/IR/PatternMatch.h"
32-
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
3331
#include "llvm/Transforms/Utils/BuildLibCalls.h"
3432
#include "llvm/Transforms/Utils/Local.h"
3533

@@ -398,6 +396,54 @@ static bool tryToFPToSat(Instruction &I, TargetTransformInfo &TTI) {
398396
return true;
399397
}
400398

399+
/// Try to replace a mathlib call to sqrt with the LLVM intrinsic. This avoids
400+
/// pessimistic codegen that has to account for setting errno and can enable
401+
/// vectorization.
402+
static bool foldSqrt(Instruction &I, TargetTransformInfo &TTI,
403+
TargetLibraryInfo &TLI, AssumptionCache &AC,
404+
DominatorTree &DT) {
405+
// Match a call to sqrt mathlib function.
406+
auto *Call = dyn_cast<CallInst>(&I);
407+
if (!Call)
408+
return false;
409+
410+
Module *M = Call->getModule();
411+
LibFunc Func;
412+
if (!TLI.getLibFunc(*Call, Func) || !isLibFuncEmittable(M, &TLI, Func))
413+
return false;
414+
415+
if (Func != LibFunc_sqrt && Func != LibFunc_sqrtf && Func != LibFunc_sqrtl)
416+
return false;
417+
418+
// If (1) this is a sqrt libcall, (2) we can assume that NAN is not created
419+
// (because NNAN or the operand arg must not be less than -0.0) and (2) we
420+
// would not end up lowering to a libcall anyway (which could change the value
421+
// of errno), then:
422+
// (1) errno won't be set.
423+
// (2) it is safe to convert this to an intrinsic call.
424+
Type *Ty = Call->getType();
425+
Value *Arg = Call->getArgOperand(0);
426+
if (TTI.haveFastSqrt(Ty) &&
427+
(Call->hasNoNaNs() ||
428+
cannotBeOrderedLessThanZero(Arg, M->getDataLayout(), &TLI, 0, &AC, &I,
429+
&DT))) {
430+
IRBuilder<> Builder(&I);
431+
IRBuilderBase::FastMathFlagGuard Guard(Builder);
432+
Builder.setFastMathFlags(Call->getFastMathFlags());
433+
434+
Function *Sqrt = Intrinsic::getDeclaration(M, Intrinsic::sqrt, Ty);
435+
Value *NewSqrt = Builder.CreateCall(Sqrt, Arg, "sqrt");
436+
I.replaceAllUsesWith(NewSqrt);
437+
438+
// Explicitly erase the old call because a call with side effects is not
439+
// trivially dead.
440+
I.eraseFromParent();
441+
return true;
442+
}
443+
444+
return false;
445+
}
446+
401447
// Check if this array of constants represents a cttz table.
402448
// Iterate over the elements from \p Table by trying to find/match all
403449
// the numbers from 0 to \p InputBits that should represent cttz results.
@@ -869,199 +915,13 @@ static bool foldPatternedLoads(Instruction &I, const DataLayout &DL) {
869915
return true;
870916
}
871917

872-
/// Try to replace a mathlib call to sqrt with the LLVM intrinsic. This avoids
873-
/// pessimistic codegen that has to account for setting errno and can enable
874-
/// vectorization.
875-
static bool foldSqrt(CallInst *Call, TargetTransformInfo &TTI,
876-
TargetLibraryInfo &TLI, AssumptionCache &AC,
877-
DominatorTree &DT) {
878-
Module *M = Call->getModule();
879-
880-
// If (1) this is a sqrt libcall, (2) we can assume that NAN is not created
881-
// (because NNAN or the operand arg must not be less than -0.0) and (2) we
882-
// would not end up lowering to a libcall anyway (which could change the value
883-
// of errno), then:
884-
// (1) errno won't be set.
885-
// (2) it is safe to convert this to an intrinsic call.
886-
Type *Ty = Call->getType();
887-
Value *Arg = Call->getArgOperand(0);
888-
if (TTI.haveFastSqrt(Ty) &&
889-
(Call->hasNoNaNs() ||
890-
cannotBeOrderedLessThanZero(Arg, M->getDataLayout(), &TLI, 0, &AC, Call,
891-
&DT))) {
892-
IRBuilder<> Builder(Call);
893-
IRBuilderBase::FastMathFlagGuard Guard(Builder);
894-
Builder.setFastMathFlags(Call->getFastMathFlags());
895-
896-
Function *Sqrt = Intrinsic::getDeclaration(M, Intrinsic::sqrt, Ty);
897-
Value *NewSqrt = Builder.CreateCall(Sqrt, Arg, "sqrt");
898-
Call->replaceAllUsesWith(NewSqrt);
899-
900-
// Explicitly erase the old call because a call with side effects is not
901-
// trivially dead.
902-
Call->eraseFromParent();
903-
return true;
904-
}
905-
906-
return false;
907-
}
908-
909-
/// Try to expand strcmp(P, string_literal) where string_literal size is 1 or 2
910-
static bool expandStrcmp(CallInst *CI, DominatorTree &DT, bool &MadeCFGChange) {
911-
Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
912-
913-
// Trivial cases are optimized during inst combine
914-
if (Str1P == Str2P)
915-
return false;
916-
917-
StringRef Str1, Str2;
918-
bool HasStr1 = getConstantStringInfo(Str1P, Str1);
919-
bool HasStr2 = getConstantStringInfo(Str2P, Str2);
920-
921-
Value *NonConstantP = nullptr;
922-
StringRef ConstantStr;
923-
924-
if (!HasStr1 && HasStr2) {
925-
NonConstantP = Str1P;
926-
ConstantStr = Str2;
927-
} else if (!HasStr2 && HasStr1) {
928-
NonConstantP = Str2P;
929-
ConstantStr = Str1;
930-
} else {
931-
return false;
932-
}
933-
934-
size_t ConstantStrSize = ConstantStr.size();
935-
936-
// Trivial cases are optimized during inst combine
937-
if (ConstantStrSize == 0 || ConstantStrSize > 2)
938-
return false;
939-
940-
// Check if strcmp result is only used in a comparison with zero
941-
if (!isOnlyUsedInZeroComparison(CI))
942-
return false;
943-
944-
// For strcmp(P, "x") do the following transformation:
945-
//
946-
// (before)
947-
// dst = strcmp(P, "x")
948-
//
949-
// (after)
950-
// v0 = P[0] - 'x'
951-
// [if v0 == 0]
952-
// v1 = P[1]
953-
// dst = phi(v0, v1)
954-
//
955-
// For strcmp(P, "xy") do the following transformation:
956-
//
957-
// (before)
958-
// dst = strcmp(P, "xy")
959-
//
960-
// (after)
961-
// v0 = P[0] - 'x'
962-
// [if v0 == 0]
963-
// v1 = P[1] - 'y'
964-
// [if v1 == 0]
965-
// v2 = P[2]
966-
// dst = phi(v0, v1, v2)
967-
//
968-
969-
IRBuilder<> B(CI->getParent());
970-
DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
971-
972-
Type *RetType = CI->getType();
973-
974-
BasicBlock *InitialBB = CI->getParent();
975-
BasicBlock *JoinBlock = SplitBlock(InitialBB, CI, &DTU);
976-
JoinBlock->setName("strcmp_expand_sub_join");
977-
978-
B.SetInsertPoint(CI);
979-
PHINode *ResultPHI = B.CreatePHI(RetType, ConstantStrSize + 1);
980-
981-
B.SetInsertPoint(InitialBB);
982-
InitialBB->getTerminator()->eraseFromParent();
983-
984-
SmallVector<DominatorTree::UpdateType, 4> DTUpdates;
985-
986-
size_t CharacterIndexToCheck = 0;
987-
for (; CharacterIndexToCheck < ConstantStrSize; ++CharacterIndexToCheck) {
988-
Value *StrCharacterValue = B.CreateZExt(
989-
B.CreateLoad(B.getInt8Ty(),
990-
B.CreateConstInBoundsGEP1_64(B.getInt8Ty(), NonConstantP,
991-
CharacterIndexToCheck)),
992-
RetType);
993-
Value *ConstantStrCharacterValue = ConstantInt::get(
994-
RetType,
995-
static_cast<unsigned char>(ConstantStr[CharacterIndexToCheck]));
996-
Value *CharacterSub =
997-
B.CreateNSWSub(StrCharacterValue, ConstantStrCharacterValue);
998-
Value *CharacterSubIsZero =
999-
B.CreateICmpEQ(CharacterSub, ConstantInt::get(RetType, 0));
1000-
BasicBlock *CharacterSubIsZeroBB =
1001-
BasicBlock::Create(B.getContext(), "strcmp_expand_sub_is_zero",
1002-
InitialBB->getParent(), JoinBlock);
1003-
B.CreateCondBr(CharacterSubIsZero, CharacterSubIsZeroBB, JoinBlock);
1004-
1005-
ResultPHI->addIncoming(CharacterSub, B.GetInsertBlock());
1006-
DTUpdates.emplace_back(DominatorTree::Insert, B.GetInsertBlock(),
1007-
CharacterSubIsZeroBB);
1008-
1009-
B.SetInsertPoint(CharacterSubIsZeroBB);
1010-
DTUpdates.emplace_back(DominatorTree::Insert, CharacterSubIsZeroBB,
1011-
JoinBlock);
1012-
}
1013-
1014-
Value *StrLastCharacterValue = B.CreateZExt(
1015-
B.CreateLoad(B.getInt8Ty(),
1016-
B.CreateConstInBoundsGEP1_64(B.getInt8Ty(), NonConstantP,
1017-
CharacterIndexToCheck)),
1018-
RetType);
1019-
ResultPHI->addIncoming(StrLastCharacterValue, B.GetInsertBlock());
1020-
B.CreateBr(JoinBlock);
1021-
1022-
DTU.applyUpdates(DTUpdates);
1023-
1024-
CI->replaceAllUsesWith(ResultPHI);
1025-
CI->eraseFromParent();
1026-
1027-
MadeCFGChange = true;
1028-
1029-
return true;
1030-
}
1031-
1032-
static bool foldLibraryCalls(Instruction &I, TargetTransformInfo &TTI,
1033-
TargetLibraryInfo &TLI, DominatorTree &DT,
1034-
AssumptionCache &AC, bool &MadeCFGChange) {
1035-
CallInst *CI = dyn_cast<CallInst>(&I);
1036-
if (!CI)
1037-
return false;
1038-
1039-
LibFunc Func;
1040-
Module *M = I.getModule();
1041-
if (!TLI.getLibFunc(*CI, Func) || !isLibFuncEmittable(M, &TLI, Func))
1042-
return false;
1043-
1044-
switch (Func) {
1045-
case LibFunc_sqrt:
1046-
case LibFunc_sqrtf:
1047-
case LibFunc_sqrtl:
1048-
return foldSqrt(CI, TTI, TLI, AC, DT);
1049-
case LibFunc_strcmp:
1050-
return expandStrcmp(CI, DT, MadeCFGChange);
1051-
default:
1052-
break;
1053-
}
1054-
1055-
return false;
1056-
}
1057-
1058918
/// This is the entry point for folds that could be implemented in regular
1059919
/// InstCombine, but they are separated because they are not expected to
1060920
/// occur frequently and/or have more than a constant-length pattern match.
1061921
static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
1062922
TargetTransformInfo &TTI,
1063923
TargetLibraryInfo &TLI, AliasAnalysis &AA,
1064-
AssumptionCache &AC, bool &MadeCFGChange) {
924+
AssumptionCache &AC) {
1065925
bool MadeChange = false;
1066926
for (BasicBlock &BB : F) {
1067927
// Ignore unreachable basic blocks.
@@ -1086,7 +946,7 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
1086946
// NOTE: This function introduces erasing of the instruction `I`, so it
1087947
// needs to be called at the end of this sequence, otherwise we may make
1088948
// bugs.
1089-
MadeChange |= foldLibraryCalls(I, TTI, TLI, DT, AC, MadeCFGChange);
949+
MadeChange |= foldSqrt(I, TTI, TLI, AC, DT);
1090950
}
1091951
}
1092952

@@ -1102,12 +962,12 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
1102962
/// handled in the callers of this function.
1103963
static bool runImpl(Function &F, AssumptionCache &AC, TargetTransformInfo &TTI,
1104964
TargetLibraryInfo &TLI, DominatorTree &DT,
1105-
AliasAnalysis &AA, bool &ChangedCFG) {
965+
AliasAnalysis &AA) {
1106966
bool MadeChange = false;
1107967
const DataLayout &DL = F.getParent()->getDataLayout();
1108968
TruncInstCombine TIC(AC, TLI, DL, DT);
1109969
MadeChange |= TIC.run(F);
1110-
MadeChange |= foldUnusualPatterns(F, DT, TTI, TLI, AA, AC, ChangedCFG);
970+
MadeChange |= foldUnusualPatterns(F, DT, TTI, TLI, AA, AC);
1111971
return MadeChange;
1112972
}
1113973

@@ -1118,21 +978,12 @@ PreservedAnalyses AggressiveInstCombinePass::run(Function &F,
1118978
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1119979
auto &TTI = AM.getResult<TargetIRAnalysis>(F);
1120980
auto &AA = AM.getResult<AAManager>(F);
1121-
1122-
bool MadeCFGChange = false;
1123-
1124-
if (!runImpl(F, AC, TTI, TLI, DT, AA, MadeCFGChange)) {
981+
if (!runImpl(F, AC, TTI, TLI, DT, AA)) {
1125982
// No changes, all analyses are preserved.
1126983
return PreservedAnalyses::all();
1127984
}
1128-
1129985
// Mark all the analyses that instcombine updates as preserved.
1130986
PreservedAnalyses PA;
1131-
1132-
if (MadeCFGChange)
1133-
PA.preserve<DominatorTreeAnalysis>();
1134-
else
1135-
PA.preserveSet<CFGAnalyses>();
1136-
987+
PA.preserveSet<CFGAnalyses>();
1137988
return PA;
1138989
}

llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -227,9 +227,21 @@ static Value *convertStrToInt(CallInst *CI, StringRef &Str, Value *EndPtr,
227227
return ConstantInt::get(RetTy, Result);
228228
}
229229

230+
static bool isOnlyUsedInComparisonWithZero(Value *V) {
231+
for (User *U : V->users()) {
232+
if (ICmpInst *IC = dyn_cast<ICmpInst>(U))
233+
if (Constant *C = dyn_cast<Constant>(IC->getOperand(1)))
234+
if (C->isNullValue())
235+
continue;
236+
// Unknown instruction.
237+
return false;
238+
}
239+
return true;
240+
}
241+
230242
static bool canTransformToMemCmp(CallInst *CI, Value *Str, uint64_t Len,
231243
const DataLayout &DL) {
232-
if (!isOnlyUsedInZeroComparison(CI))
244+
if (!isOnlyUsedInComparisonWithZero(CI))
233245
return false;
234246

235247
if (!isDereferenceableAndAlignedPointer(Str, Align(1), APInt(64, Len), DL))

0 commit comments

Comments
 (0)