-
Notifications
You must be signed in to change notification settings - Fork 14.3k
Reland "[Transforms] LoopIdiomRecognize recognize strlen and wcslen #108985" #132572
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
…lvm#108985)" (llvm#131412) Relands llvm#108985 This PR continues the effort made in https://discourse.llvm.org/t/rfc-strlen-loop-idiom-recognition-folding/55848 and https://reviews.llvm.org/D83392 and https://reviews.llvm.org/D88460 to extend `LoopIdiomRecognize` to find and replace loops of the form ```c base = str; while (*str) ++str; ``` and transforming the `strlen` loop idiom into the appropriate `strlen` and `wcslen` library call which will give a small performance boost if replaced. ```c str = base + strlen(base) len = str - base ```
We should bailout of modifying the CFG if the library functions are not emittable or disabled.
@llvm/pr-subscribers-llvm-transforms Author: Henry Jiang (mustartt) ChangesReland #108985 In this patch, also add Patch is 61.34 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/132572.diff 9 Files Affected:
diff --git a/llvm/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h b/llvm/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h
index 0c6406d861851..241a3fc109360 100644
--- a/llvm/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h
+++ b/llvm/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h
@@ -34,6 +34,12 @@ struct DisableLIRP {
/// When true, Memcpy is disabled.
static bool Memcpy;
+
+ /// When true, Strlen is disabled.
+ static bool Strlen;
+
+ /// When true, Wcslen is disabled.
+ static bool Wcslen;
};
/// Performs Loop Idiom Recognize Pass.
diff --git a/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h b/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
index a8fb38e726004..50f695dbe6c07 100644
--- a/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
+++ b/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
@@ -93,6 +93,12 @@ namespace llvm {
Value *emitStrLen(Value *Ptr, IRBuilderBase &B, const DataLayout &DL,
const TargetLibraryInfo *TLI);
+ /// Emit a call to the wcslen function to the builder, for the specified
+ /// pointer. Ptr is required to be some pointer type, and the return value has
+ /// 'size_t' type.
+ Value *emitWcsLen(Value *Ptr, IRBuilderBase &B, const DataLayout &DL,
+ const TargetLibraryInfo *TLI);
+
/// Emit a call to the strdup function to the builder, for the specified
/// pointer. Ptr is required to be some pointer type, and the return value has
/// 'i8*' type.
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 8dc7f325f6262..4940dd8c47dde 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -20,8 +20,7 @@
//
// TODO List:
//
-// Future loop memory idioms to recognize:
-// memcmp, strlen, etc.
+// Future loop memory idioms to recognize: memcmp, etc.
//
// This could recognize common matrix multiplies and dot product idioms and
// replace them with calls to BLAS (if linked in??).
@@ -33,6 +32,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -97,6 +97,7 @@ using namespace llvm;
STATISTIC(NumMemSet, "Number of memset's formed from loop stores");
STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores");
STATISTIC(NumMemMove, "Number of memmove's formed from loop load+stores");
+STATISTIC(NumStrLen, "Number of strlen's and wcslen's formed from loop loads");
STATISTIC(
NumShiftUntilBitTest,
"Number of uncountable loops recognized as 'shift until bitttest' idiom");
@@ -126,6 +127,22 @@ static cl::opt<bool, true>
cl::location(DisableLIRP::Memcpy), cl::init(false),
cl::ReallyHidden);
+bool DisableLIRP::Strlen;
+static cl::opt<bool, true>
+ DisableLIRPStrlen("disable-loop-idiom-strlen",
+ cl::desc("Proceed with loop idiom recognize pass, but do "
+ "not convert loop(s) to strlen."),
+ cl::location(DisableLIRP::Strlen), cl::init(false),
+ cl::ReallyHidden);
+
+bool DisableLIRP::Wcslen;
+static cl::opt<bool, true>
+ EnableLIRPWcslen("disable-loop-idiom-wcslen",
+ cl::desc("Proceed with loop idiom recognize pass, "
+ "enable conversion of loop(s) to wcslen."),
+ cl::location(DisableLIRP::Wcslen), cl::init(false),
+ cl::ReallyHidden);
+
static cl::opt<bool> UseLIRCodeSizeHeurs(
"use-lir-code-size-heurs",
cl::desc("Use loop idiom recognition code size heuristics when compiling "
@@ -246,6 +263,7 @@ class LoopIdiomRecognize {
bool recognizeShiftUntilBitTest();
bool recognizeShiftUntilZero();
+ bool recognizeAndInsertStrLen();
/// @}
};
@@ -295,7 +313,8 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L) {
// Disable loop idiom recognition if the function's name is a common idiom.
StringRef Name = L->getHeader()->getParent()->getName();
- if (Name == "memset" || Name == "memcpy")
+ if (Name == "memset" || Name == "memcpy" || Name == "strlen" ||
+ Name == "wcslen")
return false;
// Determine if code size heuristics need to be applied.
@@ -1494,7 +1513,7 @@ bool LoopIdiomRecognize::runOnNoncountableLoop() {
return recognizePopcount() || recognizeAndInsertFFS() ||
recognizeShiftUntilBitTest() || recognizeShiftUntilZero() ||
- recognizeShiftUntilLessThan();
+ recognizeShiftUntilLessThan() || recognizeAndInsertStrLen();
}
/// Check if the given conditional branch is based on the comparison between
@@ -1512,7 +1531,7 @@ static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry,
if (!Cond)
return nullptr;
- ConstantInt *CmpZero = dyn_cast<ConstantInt>(Cond->getOperand(1));
+ auto *CmpZero = dyn_cast<ConstantInt>(Cond->getOperand(1));
if (!CmpZero || !CmpZero->isZero())
return nullptr;
@@ -1529,6 +1548,275 @@ static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry,
return nullptr;
}
+namespace {
+
+class StrlenVerifier {
+public:
+ explicit StrlenVerifier(const Loop *CurLoop, ScalarEvolution *SE,
+ const TargetLibraryInfo *TLI)
+ : CurLoop(CurLoop), SE(SE), TLI(TLI) {}
+
+ bool isValidStrlenIdiom() {
+ // Give up if the loop has multiple blocks, multiple backedges, or
+ // multiple exit blocks
+ if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1 ||
+ !CurLoop->getUniqueExitBlock())
+ return false;
+
+ // It should have a preheader and a branch instruction.
+ BasicBlock *Preheader = CurLoop->getLoopPreheader();
+ if (!Preheader)
+ return false;
+
+ BranchInst *EntryBI = dyn_cast<BranchInst>(Preheader->getTerminator());
+ if (!EntryBI)
+ return false;
+
+ // The loop exit must be conditioned on an icmp with 0 the null terminator.
+ // The icmp operand has to be a load on some SSA reg that increments
+ // by 1 in the loop.
+ BasicBlock *LoopBody = *CurLoop->block_begin();
+
+ // Skip if the body is too big as it most likely is not a strlen idiom.
+ if (!LoopBody || LoopBody->size() >= 15)
+ return false;
+
+ BranchInst *LoopTerm = dyn_cast<BranchInst>(LoopBody->getTerminator());
+ Value *LoopCond = matchCondition(LoopTerm, LoopBody);
+ if (!LoopCond)
+ return false;
+
+ LoadInst *LoopLoad = dyn_cast<LoadInst>(LoopCond);
+ if (!LoopLoad || LoopLoad->getPointerAddressSpace() != 0)
+ return false;
+
+ OperandType = LoopLoad->getType();
+ if (!OperandType || !OperandType->isIntegerTy())
+ return false;
+
+ // See if the pointer expression is an AddRec with constant step a of form
+ // ({n,+,a}) where a is the width of the char type.
+ Value *IncPtr = LoopLoad->getPointerOperand();
+ const SCEVAddRecExpr *LoadEv =
+ dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IncPtr));
+ if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine())
+ return false;
+ LoadBaseEv = LoadEv->getStart();
+
+ LLVM_DEBUG(dbgs() << "pointer load scev: " << *LoadEv << "\n");
+
+ const SCEVConstant *Step =
+ dyn_cast<SCEVConstant>(LoadEv->getStepRecurrence(*SE));
+ if (!Step)
+ return false;
+
+ unsigned StepSize = 0;
+ StepSizeCI = dyn_cast<ConstantInt>(Step->getValue());
+ if (!StepSizeCI)
+ return false;
+ StepSize = StepSizeCI->getZExtValue();
+
+ // Verify that StepSize is consistent with platform char width.
+ OpWidth = OperandType->getIntegerBitWidth();
+ unsigned WcharSize = TLI->getWCharSize(*LoopLoad->getModule());
+ if (OpWidth != StepSize * 8)
+ return false;
+ if (OpWidth != 8 && OpWidth != 16 && OpWidth != 32)
+ return false;
+ if (OpWidth >= 16)
+ if (OpWidth != WcharSize * 8)
+ return false;
+
+ // Scan every instruction in the loop to ensure there are no side effects.
+ for (Instruction &I : *LoopBody)
+ if (I.mayHaveSideEffects())
+ return false;
+
+ BasicBlock *LoopExitBB = CurLoop->getExitBlock();
+ if (!LoopExitBB)
+ return false;
+
+ for (PHINode &PN : LoopExitBB->phis()) {
+ if (!SE->isSCEVable(PN.getType()))
+ return false;
+
+ const SCEV *Ev = SE->getSCEV(&PN);
+ if (!Ev)
+ return false;
+
+ LLVM_DEBUG(dbgs() << "loop exit phi scev: " << *Ev << "\n");
+
+ // Since we verified that the loop trip count will be a valid strlen
+ // idiom, we can expand all lcssa phi with {n,+,1} as (n + strlen) and use
+ // SCEVExpander materialize the loop output.
+ const SCEVAddRecExpr *AddRecEv = dyn_cast<SCEVAddRecExpr>(Ev);
+ if (!AddRecEv || !AddRecEv->isAffine())
+ return false;
+
+ // We only want RecAddExpr with recurrence step that is constant. This
+ // is good enough for all the idioms we want to recognize. Later we expand
+ // and materialize the recurrence as {base,+,a} -> (base + a * strlen)
+ if (!dyn_cast<SCEVConstant>(AddRecEv->getStepRecurrence(*SE)))
+ return false;
+ }
+
+ return true;
+ }
+
+public:
+ const Loop *CurLoop;
+ ScalarEvolution *SE;
+ const TargetLibraryInfo *TLI;
+
+ unsigned OpWidth;
+ ConstantInt *StepSizeCI;
+ const SCEV *LoadBaseEv;
+ Type *OperandType;
+};
+
+} // namespace
+
+/// The Strlen Idiom we are trying to detect has the following structure
+///
+/// preheader:
+/// ...
+/// br label %body, ...
+///
+/// body:
+/// ... ; %0 is incremented by a gep
+/// %1 = load i8, ptr %0, align 1
+/// %2 = icmp eq i8 %1, 0
+/// br i1 %2, label %exit, label %body
+///
+/// exit:
+/// %lcssa = phi [%0, %body], ...
+///
+/// We expect the strlen idiom to have a load of a character type that
+/// is compared against '\0', and such load pointer operand must have scev
+/// expression of the form {%str,+,c} where c is a ConstantInt of the
+/// appropiate character width for the idiom, and %str is the base of the string
+/// And, that all lcssa phis have the form {...,+,n} where n is a constant,
+///
+/// When transforming the output of the strlen idiom, the lccsa phi are
+/// expanded using SCEVExpander as {base scev,+,a} -> (base scev + a * strlen)
+/// and all subsequent uses are replaced. For example,
+///
+/// \code{.c}
+/// const char* base = str;
+/// while (*str != '\0')
+/// ++str;
+/// size_t result = str - base;
+/// \endcode
+///
+/// will be transformed as follows: The idiom will be replaced by a strlen
+/// computation to compute the address of the null terminator of the string.
+///
+/// \code{.c}
+/// const char* base = str;
+/// const char* end = base + strlen(str);
+/// size_t result = end - base;
+/// \endcode
+///
+/// In the case we index by an induction variable, as long as the induction
+/// variable has a constant int increment, we can replace all such indvars
+/// with the closed form computation of strlen
+///
+/// \code{.c}
+/// size_t i = 0;
+/// while (str[i] != '\0')
+/// ++i;
+/// size_t result = i;
+/// \endcode
+///
+/// Will be replaced by
+///
+/// \code{.c}
+/// size_t i = 0 + strlen(str);
+/// size_t result = i;
+/// \endcode
+///
+bool LoopIdiomRecognize::recognizeAndInsertStrLen() {
+ if (DisableLIRP::All)
+ return false;
+
+ StrlenVerifier Verifier(CurLoop, SE, TLI);
+
+ if (!Verifier.isValidStrlenIdiom())
+ return false;
+
+ BasicBlock *Preheader = CurLoop->getLoopPreheader();
+ BasicBlock *LoopExitBB = CurLoop->getExitBlock();
+
+ if (Verifier.OpWidth == 8) {
+ if (DisableLIRP::Strlen)
+ return false;
+ if (!isLibFuncEmittable(Preheader->getModule(), TLI, LibFunc_strlen))
+ return false;
+ } else {
+ if (DisableLIRP::Wcslen)
+ return false;
+ if (!isLibFuncEmittable(Preheader->getModule(), TLI, LibFunc_wcslen))
+ return false;
+ }
+
+ IRBuilder<> Builder(Preheader->getTerminator());
+ SCEVExpander Expander(*SE, Preheader->getModule()->getDataLayout(),
+ "strlen_idiom");
+ Value *MaterialzedBase = Expander.expandCodeFor(
+ Verifier.LoadBaseEv, Verifier.LoadBaseEv->getType(),
+ Builder.GetInsertPoint());
+
+ Value *StrLenFunc = nullptr;
+ if (Verifier.OpWidth == 8) {
+ StrLenFunc = emitStrLen(MaterialzedBase, Builder, *DL, TLI);
+ } else {
+ StrLenFunc = emitWcsLen(MaterialzedBase, Builder, *DL, TLI);
+ }
+ assert(StrLenFunc && "Failed to emit strlen function.");
+
+ const SCEV *StrlenEv = SE->getSCEV(StrLenFunc);
+ SmallVector<PHINode *, 4> Cleanup;
+ for (PHINode &PN : LoopExitBB->phis()) {
+ // We can now materialize the loop output as all phi have scev {base,+,a}.
+ // We expand the phi as:
+ // %strlen = call i64 @strlen(%str)
+ // %phi.new = base expression + step * %strlen
+ const SCEV *Ev = SE->getSCEV(&PN);
+ const SCEVAddRecExpr *AddRecEv = dyn_cast<SCEVAddRecExpr>(Ev);
+ const SCEVConstant *Step =
+ dyn_cast<SCEVConstant>(AddRecEv->getStepRecurrence(*SE));
+ const SCEV *Base = AddRecEv->getStart();
+
+ // It is safe to truncate to base since if base is narrower than size_t
+ // the equivalent user code will have to truncate anyways.
+ const SCEV *NewEv = SE->getAddExpr(
+ Base, SE->getMulExpr(Step, SE->getTruncateOrSignExtend(
+ StrlenEv, Base->getType())));
+
+ Value *MaterializedPHI = Expander.expandCodeFor(NewEv, NewEv->getType(),
+ Builder.GetInsertPoint());
+ Expander.clear();
+ PN.replaceAllUsesWith(MaterializedPHI);
+ Cleanup.push_back(&PN);
+ }
+
+ // All LCSSA Loop Phi are dead, the left over dead loop body can be cleaned
+ // up by later passes
+ for (PHINode *PN : Cleanup)
+ RecursivelyDeleteDeadPHINode(PN);
+ SE->forgetLoop(CurLoop);
+
+ ++NumStrLen;
+ LLVM_DEBUG(dbgs() << " Formed strlen idiom: " << *StrLenFunc << "\n");
+ ORE.emit([&]() {
+ return OptimizationRemark(DEBUG_TYPE, "recognizeAndInsertStrLen",
+ CurLoop->getStartLoc(), Preheader)
+ << "Transformed " << StrLenFunc->getName() << " loop idiom";
+ });
+
+ return true;
+}
+
/// Check if the given conditional branch is based on an unsigned less-than
/// comparison between a variable and a constant, and if the comparison is false
/// the control yields to the loop entry. If the branch matches the behaviour,
diff --git a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
index 2301be6977cef..24eefc91117b4 100644
--- a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -1582,6 +1582,15 @@ Value *llvm::emitStrLen(Value *Ptr, IRBuilderBase &B, const DataLayout &DL,
return emitLibCall(LibFunc_strlen, SizeTTy, CharPtrTy, Ptr, B, TLI);
}
+Value *llvm::emitWcsLen(Value *Ptr, IRBuilderBase &B, const DataLayout &DL,
+ const TargetLibraryInfo *TLI) {
+ assert(Ptr && Ptr->getType()->isPointerTy() &&
+ "Argument to wcslen intrinsic must be a pointer.");
+ Type *PtrTy = B.getPtrTy();
+ Type *SizeTTy = getSizeTTy(B, TLI);
+ return emitLibCall(LibFunc_wcslen, SizeTTy, PtrTy, Ptr, B, TLI);
+}
+
Value *llvm::emitStrDup(Value *Ptr, IRBuilderBase &B,
const TargetLibraryInfo *TLI) {
Type *CharPtrTy = B.getPtrTy();
diff --git a/llvm/test/Transforms/LoopIdiom/strlen-noidiom.ll b/llvm/test/Transforms/LoopIdiom/strlen-noidiom.ll
new file mode 100644
index 0000000000000..498ec52ed26ee
--- /dev/null
+++ b/llvm/test/Transforms/LoopIdiom/strlen-noidiom.ll
@@ -0,0 +1,64 @@
+; RUN: opt -passes='loop(loop-idiom)' < %s -S | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK-LABEL: @strlen(
+; CHECK-NOT: call{{.*}} strlen
+; CHECK-LABEL: @wcslen(
+; CHECK-NOT: call{{.*}} wcslen
+
+define i64 @strlen(ptr %str) {
+entry:
+ br label %while.cond
+
+while.cond:
+ %str.addr.0 = phi ptr [ %str, %entry ], [ %incdec.ptr, %while.cond ]
+ %0 = load i8, ptr %str.addr.0, align 1
+ %cmp.not = icmp eq i8 %0, 0
+ %incdec.ptr = getelementptr i8, ptr %str.addr.0, i64 1
+ br i1 %cmp.not, label %while.end, label %while.cond
+
+while.end:
+ %sub.ptr.lhs.cast = ptrtoint ptr %str.addr.0 to i64
+ %sub.ptr.rhs.cast = ptrtoint ptr %str to i64
+ %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
+ ret i64 %sub.ptr.sub
+}
+
+define i64 @wcslen(ptr %src) {
+entry:
+ %cmp = icmp eq ptr %src, null
+ br i1 %cmp, label %return, label %lor.lhs.false
+
+lor.lhs.false: ; preds = %entry
+ %0 = load i32, ptr %src, align 4
+ %cmp1 = icmp eq i32 %0, 0
+ br i1 %cmp1, label %return, label %while.cond.preheader
+
+while.cond.preheader: ; preds = %lor.lhs.false
+ br label %while.cond
+
+while.cond: ; preds = %while.cond.preheader, %while.cond
+ %src.pn = phi ptr [ %curr.0, %while.cond ], [ %src, %while.cond.preheader ]
+ %curr.0 = getelementptr inbounds i8, ptr %src.pn, i64 4
+ %1 = load i32, ptr %curr.0, align 4
+ %tobool.not = icmp eq i32 %1, 0
+ br i1 %tobool.not, label %while.end, label %while.cond
+
+while.end: ; preds = %while.cond
+ %curr.0.lcssa = phi ptr [ %curr.0, %while.cond ]
+ %sub.ptr.lhs.cast = ptrtoint ptr %curr.0.lcssa to i64
+ %sub.ptr.rhs.cast = ptrtoint ptr %src to i64
+ %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
+ %sub.ptr.div = ashr exact i64 %sub.ptr.sub, 2
+ br label %return
+
+return: ; preds = %entry, %lor.lhs.false, %while.end
+ %retval.0 = phi i64 [ %sub.ptr.div, %while.end ], [ 0, %lor.lhs.false ], [ 0, %entry ]
+ ret i64 %retval.0
+}
+
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"wchar_size", i32 4}
diff --git a/llvm/test/Transforms/LoopIdiom/strlen-not-emittable.ll b/llvm/test/Transforms/LoopIdiom/strlen-not-emittable.ll
new file mode 100644
index 0000000000000..00fbe89846a30
--- /dev/null
+++ b/llvm/test/Transforms/LoopIdiom/strlen-not-emittable.ll
@@ -0,0 +1,66 @@
+; RUN: opt -passes='loop(loop-idiom)' < %s -S | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; invalid libcall prototype
+declare void @strlen(i32)
+declare void @wcslen(i32)
+
+define i64 @valid_wcslen32(ptr %src) {
+; CHECK-LABEL: valid_wcslen32
+; CHECK-NOT: call {{.*}} @wcslen
+entry:
+ %cmp = icmp eq ptr %src, null
+ br i1 %cmp, label %return, label %lor.lhs.false
+
+lor.lhs.false: ; preds = %entry
+ %0 = load i32, ptr %src, align 4
+ %cmp1 = icmp eq i32 %0, 0
+ br i1 %cmp1, label %return, label %while.cond.preheader
+
+while.cond.preheader: ; preds = %lor.lhs.false
+ br label %while.cond
+
+while.cond: ; preds = %while.cond.preheader, %while.cond
+ %src.pn = phi ptr [ %curr.0, %while.cond ], [ %src, %while.cond.preheader ]
+ %curr.0 = getelementptr inbounds i8, ptr %src.pn, i64 4
+ %1 = load i32, ptr %curr.0, align 4
+ %tobool.not = icmp eq i32 %1, 0
+ br i1 %tobool.not, label %while.end, label %while.cond
+
+while.end: ; preds = %while.cond
+ %curr.0.lcssa = phi ptr [ %curr.0, %while.cond ]
+ %sub.ptr.lhs.cast = ptrtoint ptr %curr.0.lcssa to i64
+ %sub.ptr.rhs.cast = ptrtoint ptr %src to i64
+ %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
+ %sub.ptr.div = ashr exact i64 %sub.ptr.sub, 2
+ br label %return
+
+return: ; preds = %entry, %lor.lhs.false, %while.end
+ %retval.0 = phi i64 [ %sub.ptr.div, %while.end ], [ 0, %lor.lhs.false ], [ 0, %entry ]
+ ret i64 %retval.0
+}
+
+define i64 @valid_strlen(ptr %str) {
+; CHECK-LABEL: valid_strlen
+; CHECK-NOT: call {{.*}} @strlen
+entry:
+ br label %while.cond
+
+while.cond:
+ %str.addr.0 = phi ptr [ %str, %entry ], [ %incdec.ptr, %while.cond ]
+ %0 = load i8, ptr %str.addr.0, align 1
+ %cmp.not = icmp ...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
@@ -295,7 +313,8 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L) { | |||
|
|||
// Disable loop idiom recognition if the function's name is a common idiom. | |||
StringRef Name = L->getHeader()->getParent()->getName(); | |||
if (Name == "memset" || Name == "memcpy") | |||
if (Name == "memset" || Name == "memcpy" || Name == "strlen" || |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This doesn't work, if the corresponding function is in a namespace. But I get it's a rather niche case, which is better addressed by -fno-builtin-strlen
or -ffreestanding
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah it does not work with mangled symbols. The original goal of this was to prevent the optimizer from calling the idiom function it self. If the idiom is in a namespace, it will still be transformed to
foo::strlen(char const*):
jmp strlen@PLT
which is arguably a lot better and more correct than accidentally creating an infinite tail call recursion:
strlen(char const*):
jmp strlen
Reland #108985
In this patch, also add
strlen
andwcslen
to the common idiom names list so that functions namedstrlen
orwcslen
does not get transformed.