Skip to content

[MemCpyOpt][NFC] Format codebase #90225

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 27, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
119 changes: 62 additions & 57 deletions llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ struct MemsetRange {
MaybeAlign Alignment;

/// TheStores - The actual stores that make up this range.
SmallVector<Instruction*, 16> TheStores;
SmallVector<Instruction *, 16> TheStores;

bool isProfitableToUseMemset(const DataLayout &DL) const;
};
Expand All @@ -108,10 +108,12 @@ struct MemsetRange {

bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
// If we found more than 4 stores to merge or 16 bytes, use memset.
if (TheStores.size() >= 4 || End-Start >= 16) return true;
if (TheStores.size() >= 4 || End - Start >= 16)
return true;

// If there is nothing to merge, don't do anything.
if (TheStores.size() < 2) return false;
if (TheStores.size() < 2)
return false;

// If any of the stores are a memset, then it is always good to extend the
// memset.
Expand All @@ -121,7 +123,8 @@ bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {

// Assume that the code generator is capable of merging pairs of stores
// together if it wants to.
if (TheStores.size() == 2) return false;
if (TheStores.size() == 2)
return false;

// If we have fewer than 8 stores, it can still be worthwhile to do this.
// For example, merging 4 i8 stores into an i32 store is useful almost always.
Expand All @@ -133,7 +136,7 @@ bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
// the maximum GPR width is the same size as the largest legal integer
// size. If so, check to see whether we will end up actually reducing the
// number of stores used.
unsigned Bytes = unsigned(End-Start);
unsigned Bytes = unsigned(End - Start);
unsigned MaxIntSize = DL.getLargestLegalIntTypeSizeInBits() / 8;
if (MaxIntSize == 0)
MaxIntSize = 1;
Expand All @@ -145,7 +148,7 @@ bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
// If we will reduce the # stores (according to this heuristic), do the
// transformation. This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32
// etc.
return TheStores.size() > NumPointerStores+NumByteStores;
return TheStores.size() > NumPointerStores + NumByteStores;
}

namespace {
Expand Down Expand Up @@ -197,7 +200,7 @@ class MemsetRanges {
/// existing ranges as appropriate.
void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
MaybeAlign Alignment, Instruction *Inst) {
int64_t End = Start+Size;
int64_t End = Start + Size;

range_iterator I = partition_point(
Ranges, [=](const MemsetRange &O) { return O.End < Start; });
Expand All @@ -207,10 +210,10 @@ void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
// to insert a new range. Handle this now.
if (I == Ranges.end() || End < I->Start) {
MemsetRange &R = *Ranges.insert(I, MemsetRange());
R.Start = Start;
R.End = End;
R.StartPtr = Ptr;
R.Alignment = Alignment;
R.Start = Start;
R.End = End;
R.StartPtr = Ptr;
R.Alignment = Alignment;
R.TheStores.push_back(Inst);
return;
}
Expand Down Expand Up @@ -397,7 +400,8 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,

if (auto *NextStore = dyn_cast<StoreInst>(BI)) {
// If this is a store, see if we can merge it in.
if (!NextStore->isSimple()) break;
if (!NextStore->isSimple())
break;

Value *StoredVal = NextStore->getValueOperand();

Expand Down Expand Up @@ -460,7 +464,8 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
// emit memset's for anything big enough to be worthwhile.
Instruction *AMemSet = nullptr;
for (const MemsetRange &Range : Ranges) {
if (Range.TheStores.size() == 1) continue;
if (Range.TheStores.size() == 1)
continue;

// If it is profitable to lower this range to memset, do so now.
if (!Range.isProfitableToUseMemset(DL))
Expand All @@ -481,12 +486,10 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
if (!Range.TheStores.empty())
AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc());

auto *NewDef =
cast<MemoryDef>(MemInsertPoint->getMemoryInst() == &*BI
? MSSAU->createMemoryAccessBefore(
AMemSet, nullptr, MemInsertPoint)
: MSSAU->createMemoryAccessAfter(
AMemSet, nullptr, MemInsertPoint));
auto *NewDef = cast<MemoryDef>(
MemInsertPoint->getMemoryInst() == &*BI
? MSSAU->createMemoryAccessBefore(AMemSet, nullptr, MemInsertPoint)
: MSSAU->createMemoryAccessAfter(AMemSet, nullptr, MemInsertPoint));
MSSAU->insertDef(NewDef, /*RenameUses=*/true);
MemInsertPoint = NewDef;

Expand All @@ -512,12 +515,13 @@ bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI) {

// Keep track of the arguments of all instruction we plan to lift
// so we can make sure to lift them as well if appropriate.
DenseSet<Instruction*> Args;
DenseSet<Instruction *> Args;
auto AddArg = [&](Value *Arg) {
auto *I = dyn_cast<Instruction>(Arg);
if (I && I->getParent() == SI->getParent()) {
// Cannot hoist user of P above P
if (I == P) return false;
if (I == P)
return false;
Args.insert(I);
}
return true;
Expand Down Expand Up @@ -630,8 +634,7 @@ bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI) {
bool MemCpyOptPass::processStoreOfLoad(StoreInst *SI, LoadInst *LI,
const DataLayout &DL,
BasicBlock::iterator &BBI) {
if (!LI->isSimple() || !LI->hasOneUse() ||
LI->getParent() != SI->getParent())
if (!LI->isSimple() || !LI->hasOneUse() || LI->getParent() != SI->getParent())
return false;

auto *T = LI->getType();
Expand Down Expand Up @@ -678,21 +681,20 @@ bool MemCpyOptPass::processStoreOfLoad(StoreInst *SI, LoadInst *LI,
UseMemMove = true;

IRBuilder<> Builder(P);
Value *Size = Builder.CreateTypeSize(Builder.getInt64Ty(),
DL.getTypeStoreSize(T));
Value *Size =
Builder.CreateTypeSize(Builder.getInt64Ty(), DL.getTypeStoreSize(T));
Instruction *M;
if (UseMemMove)
M = Builder.CreateMemMove(
SI->getPointerOperand(), SI->getAlign(),
LI->getPointerOperand(), LI->getAlign(), Size);
M = Builder.CreateMemMove(SI->getPointerOperand(), SI->getAlign(),
LI->getPointerOperand(), LI->getAlign(),
Size);
else
M = Builder.CreateMemCpy(
SI->getPointerOperand(), SI->getAlign(),
LI->getPointerOperand(), LI->getAlign(), Size);
M = Builder.CreateMemCpy(SI->getPointerOperand(), SI->getAlign(),
LI->getPointerOperand(), LI->getAlign(), Size);
M->copyMetadata(*SI, LLVMContext::MD_DIAssignID);

LLVM_DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI << " => "
<< *M << "\n");
LLVM_DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI << " => " << *M
<< "\n");

auto *LastDef =
cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI));
Expand Down Expand Up @@ -755,7 +757,8 @@ bool MemCpyOptPass::processStoreOfLoad(StoreInst *SI, LoadInst *LI,
}

bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
if (!SI->isSimple()) return false;
if (!SI->isSimple())
return false;

// Avoid merging nontemporal stores since the resulting
// memcpy/memset would not be able to preserve the nontemporal hint.
Expand Down Expand Up @@ -794,8 +797,8 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
// 0xA0A0A0A0 and 0.0.
auto *V = SI->getOperand(0);
if (Value *ByteVal = isBytewiseValue(V, DL)) {
if (Instruction *I = tryMergingIntoMemset(SI, SI->getPointerOperand(),
ByteVal)) {
if (Instruction *I =
tryMergingIntoMemset(SI, SI->getPointerOperand(), ByteVal)) {
BBI = I->getIterator(); // Don't invalidate iterator.
return true;
}
Expand All @@ -816,8 +819,7 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
// The newly inserted memset is immediately overwritten by the original
// store, so we do not need to rename uses.
auto *StoreDef = cast<MemoryDef>(MSSA->getMemoryAccess(SI));
auto *NewAccess = MSSAU->createMemoryAccessBefore(
M, nullptr, StoreDef);
auto *NewAccess = MSSAU->createMemoryAccessBefore(M, nullptr, StoreDef);
MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/false);

eraseInstruction(SI);
Expand All @@ -836,8 +838,8 @@ bool MemCpyOptPass::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {
// See if there is another memset or store neighboring this memset which
// allows us to widen out the memset to do a single larger store.
if (isa<ConstantInt>(MSI->getLength()) && !MSI->isVolatile())
if (Instruction *I = tryMergingIntoMemset(MSI, MSI->getDest(),
MSI->getValue())) {
if (Instruction *I =
tryMergingIntoMemset(MSI, MSI->getDest(), MSI->getValue())) {
BBI = I->getIterator(); // Don't invalidate iterator.
return true;
}
Expand All @@ -850,7 +852,8 @@ bool MemCpyOptPass::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {
bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
Instruction *cpyStore, Value *cpyDest,
Value *cpySrc, TypeSize cpySize,
Align cpyDestAlign, BatchAAResults &BAA,
Align cpyDestAlign,
BatchAAResults &BAA,
std::function<CallInst *()> GetC) {
// The general transformation to keep in mind is
//
Expand Down Expand Up @@ -898,15 +901,15 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
if (F->isIntrinsic() && F->getIntrinsicID() == Intrinsic::lifetime_start)
return false;


if (C->getParent() != cpyStore->getParent()) {
LLVM_DEBUG(dbgs() << "Call Slot: block local restriction\n");
return false;
}

MemoryLocation DestLoc = isa<StoreInst>(cpyStore) ?
MemoryLocation::get(cpyStore) :
MemoryLocation::getForDest(cast<MemCpyInst>(cpyStore));
MemoryLocation DestLoc =
isa<StoreInst>(cpyStore)
? MemoryLocation::get(cpyStore)
: MemoryLocation::getForDest(cast<MemCpyInst>(cpyStore));

// Check that nothing touches the dest of the copy between
// the call and the store/memcpy.
Expand Down Expand Up @@ -1175,7 +1178,8 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,

// If all checks passed, then we can transform M.
LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy->memcpy src:\n"
<< *MDep << '\n' << *M << '\n');
<< *MDep << '\n'
<< *M << '\n');

// TODO: Is this worth it if we're creating a less aligned memcpy? For
// example we could be moving from movaps -> movq on x86.
Expand Down Expand Up @@ -1307,8 +1311,8 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
// memcpy's defining access is the memset about to be removed.
auto *LastDef =
cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
auto *NewAccess = MSSAU->createMemoryAccessBefore(
NewMemSet, nullptr, LastDef);
auto *NewAccess =
MSSAU->createMemoryAccessBefore(NewMemSet, nullptr, LastDef);
MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);

eraseInstruction(MemSet);
Expand Down Expand Up @@ -1384,7 +1388,7 @@ bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
return false;

// A known memcpy size is also required.
auto *CCopySize = dyn_cast<ConstantInt>(CopySize);
auto *CCopySize = dyn_cast<ConstantInt>(CopySize);
if (!CCopySize)
return false;
if (CCopySize->getZExtValue() > CMemSetSize->getZExtValue()) {
Expand Down Expand Up @@ -1655,7 +1659,8 @@ static bool isZeroSize(Value *Size) {
/// altogether.
bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
// We can only optimize non-volatile memcpy's.
if (M->isVolatile()) return false;
if (M->isVolatile())
return false;

// If the source and destination of the memcpy are the same, then zap it.
if (M->getSource() == M->getDest()) {
Expand Down Expand Up @@ -1796,11 +1801,10 @@ bool MemCpyOptPass::processMemMove(MemMoveInst *M) {
<< "\n");

// If not, then we know we can transform this.
Type *ArgTys[3] = { M->getRawDest()->getType(),
M->getRawSource()->getType(),
M->getLength()->getType() };
M->setCalledFunction(Intrinsic::getDeclaration(M->getModule(),
Intrinsic::memcpy, ArgTys));
Type *ArgTys[3] = {M->getRawDest()->getType(), M->getRawSource()->getType(),
M->getLength()->getType()};
M->setCalledFunction(
Intrinsic::getDeclaration(M->getModule(), Intrinsic::memcpy, ArgTys));

// For MemorySSA nothing really changes (except that memcpy may imply stricter
// aliasing guarantees).
Expand Down Expand Up @@ -1843,7 +1847,8 @@ bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
// Get the alignment of the byval. If the call doesn't specify the alignment,
// then it is some target specific value that we can't know.
MaybeAlign ByValAlign = CB.getParamAlign(ArgNo);
if (!ByValAlign) return false;
if (!ByValAlign)
return false;

// If it is greater than the memcpy, then we check to see if we can force the
// source of the memcpy to the alignment we need. If we fail, we bail out.
Expand Down Expand Up @@ -1987,7 +1992,7 @@ bool MemCpyOptPass::iterateOnFunction(Function &F) {
continue;

for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
// Avoid invalidating the iterator.
// Avoid invalidating the iterator.
Instruction *I = &*BI++;

bool RepeatInstruction = false;
Expand Down