Skip to content

Commit 7f6360c

Browse files
Reland: Expand existing loopsink testing to also test loopsinking using new pass manager and fix LICM bug.
Summary: Expand existing loopsink testing to also test loopsinking using new pass manager. Enable memoryssa for loopsink with new pass manager. This combination exposed a bug that was previously fixed for loopsink without memoryssa. When sinking an instruction into a loop, the source block may not be part of the loop but still needs to be checked for pointer invalidation. This is the fix for bugzilla #39695 (PR 54659) expanded to also work with memoryssa. Respond to review comments. Enable Memory SSA in legacy Loop Sink pass under EnableMSSALoopDependency option control. Update tests accordingly. Respond to review comments. Add options controlling whether memoryssa is used for loop sink, defaulting to off. Expand testing based on these options. Respond to review comments. Properly indicated preserved analyses. This relanding addresses a compile-time performance problem by moving test for profile data earlier to avoid unnecessary computations. Author: Jamie Schmeiser <[email protected]> Reviewed By: asbirlea (Alina Sbirlea) Differential Revision: https://reviews.llvm.org/D90249
1 parent 7ae3464 commit 7f6360c

File tree

6 files changed

+153
-36
lines changed

6 files changed

+153
-36
lines changed

llvm/lib/Transforms/Scalar/LICM.cpp

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -163,8 +163,10 @@ static bool pointerInvalidatedByLoop(MemoryLocation MemLoc,
163163
AliasSetTracker *CurAST, Loop *CurLoop,
164164
AAResults *AA);
165165
static bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU,
166-
Loop *CurLoop,
166+
Loop *CurLoop, Instruction &I,
167167
SinkAndHoistLICMFlags &Flags);
168+
static bool pointerInvalidatedByBlockWithMSSA(BasicBlock &BB, MemorySSA &MSSA,
169+
MemoryUse &MU);
168170
static Instruction *cloneInstructionInExitBlock(
169171
Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI,
170172
const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU);
@@ -1155,7 +1157,7 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
11551157
CurLoop, AA);
11561158
else
11571159
Invalidated = pointerInvalidatedByLoopWithMSSA(
1158-
MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(LI)), CurLoop, *Flags);
1160+
MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(LI)), CurLoop, I, *Flags);
11591161
// Check loop-invariant address because this may also be a sinkable load
11601162
// whose address is not necessarily loop-invariant.
11611163
if (ORE && Invalidated && CurLoop->isLoopInvariant(LI->getPointerOperand()))
@@ -1211,7 +1213,7 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
12111213
CurAST, CurLoop, AA);
12121214
else
12131215
Invalidated = pointerInvalidatedByLoopWithMSSA(
1214-
MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(CI)), CurLoop,
1216+
MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(CI)), CurLoop, I,
12151217
*Flags);
12161218
if (Invalidated)
12171219
return false;
@@ -1312,7 +1314,6 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
13121314
}
13131315
}
13141316
}
1315-
13161317
auto *Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(SI);
13171318
Flags->incrementClobberingCalls();
13181319
// If there are no clobbering Defs in the loop, store is safe to hoist.
@@ -2285,7 +2286,7 @@ static bool pointerInvalidatedByLoop(MemoryLocation MemLoc,
22852286
}
22862287

22872288
bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU,
2288-
Loop *CurLoop,
2289+
Loop *CurLoop, Instruction &I,
22892290
SinkAndHoistLICMFlags &Flags) {
22902291
// For hoisting, use the walker to determine safety
22912292
if (!Flags.getIsSink()) {
@@ -2321,12 +2322,22 @@ bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU,
23212322
if (Flags.tooManyMemoryAccesses())
23222323
return true;
23232324
for (auto *BB : CurLoop->getBlocks())
2324-
if (auto *Accesses = MSSA->getBlockDefs(BB))
2325-
for (const auto &MA : *Accesses)
2326-
if (const auto *MD = dyn_cast<MemoryDef>(&MA))
2327-
if (MU->getBlock() != MD->getBlock() ||
2328-
!MSSA->locallyDominates(MD, MU))
2329-
return true;
2325+
if (pointerInvalidatedByBlockWithMSSA(*BB, *MSSA, *MU))
2326+
return true;
2327+
// When sinking, the source block may not be part of the loop so check it.
2328+
if (!CurLoop->contains(&I))
2329+
return pointerInvalidatedByBlockWithMSSA(*I.getParent(), *MSSA, *MU);
2330+
2331+
return false;
2332+
}
2333+
2334+
bool pointerInvalidatedByBlockWithMSSA(BasicBlock &BB, MemorySSA &MSSA,
2335+
MemoryUse &MU) {
2336+
if (const auto *Accesses = MSSA.getBlockDefs(&BB))
2337+
for (const auto &MA : *Accesses)
2338+
if (const auto *MD = dyn_cast<MemoryDef>(&MA))
2339+
if (MU.getBlock() != MD->getBlock() || !MSSA.locallyDominates(MD, &MU))
2340+
return true;
23302341
return false;
23312342
}
23322343

llvm/lib/Transforms/Scalar/LoopSink.cpp

Lines changed: 120 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@
3939
#include "llvm/Analysis/Loads.h"
4040
#include "llvm/Analysis/LoopInfo.h"
4141
#include "llvm/Analysis/LoopPass.h"
42+
#include "llvm/Analysis/MemorySSA.h"
43+
#include "llvm/Analysis/MemorySSAUpdater.h"
4244
#include "llvm/Analysis/ScalarEvolution.h"
4345
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
4446
#include "llvm/IR/Dominators.h"
@@ -67,6 +69,14 @@ static cl::opt<unsigned> MaxNumberOfUseBBsForSinking(
6769
"max-uses-for-sinking", cl::Hidden, cl::init(30),
6870
cl::desc("Do not sink instructions that have too many uses."));
6971

72+
static cl::opt<bool> EnableMSSAInLoopSink(
73+
"enable-mssa-in-loop-sink", cl::Hidden, cl::init(false),
74+
cl::desc("Enable MemorySSA for LoopSink in new pass manager"));
75+
76+
static cl::opt<bool> EnableMSSAInLegacyLoopSink(
77+
"enable-mssa-in-legacy-loop-sink", cl::Hidden, cl::init(false),
78+
cl::desc("Enable MemorySSA for LoopSink in legacy pass manager"));
79+
7080
/// Return adjusted total frequency of \p BBs.
7181
///
7282
/// * If there is only one BB, sinking instruction will not introduce code
@@ -172,11 +182,10 @@ findBBsToSinkInto(const Loop &L, const SmallPtrSetImpl<BasicBlock *> &UseBBs,
172182
// sinking is successful.
173183
// \p LoopBlockNumber is used to sort the insertion blocks to ensure
174184
// determinism.
175-
static bool sinkInstruction(Loop &L, Instruction &I,
176-
const SmallVectorImpl<BasicBlock *> &ColdLoopBBs,
177-
const SmallDenseMap<BasicBlock *, int, 16> &LoopBlockNumber,
178-
LoopInfo &LI, DominatorTree &DT,
179-
BlockFrequencyInfo &BFI) {
185+
static bool sinkInstruction(
186+
Loop &L, Instruction &I, const SmallVectorImpl<BasicBlock *> &ColdLoopBBs,
187+
const SmallDenseMap<BasicBlock *, int, 16> &LoopBlockNumber, LoopInfo &LI,
188+
DominatorTree &DT, BlockFrequencyInfo &BFI, MemorySSAUpdater *MSSAU) {
180189
// Compute the set of blocks in loop L which contain a use of I.
181190
SmallPtrSet<BasicBlock *, 2> BBs;
182191
for (auto &U : I.uses()) {
@@ -230,6 +239,21 @@ static bool sinkInstruction(Loop &L, Instruction &I,
230239
Instruction *IC = I.clone();
231240
IC->setName(I.getName());
232241
IC->insertBefore(&*N->getFirstInsertionPt());
242+
243+
if (MSSAU && MSSAU->getMemorySSA()->getMemoryAccess(&I)) {
244+
// Create a new MemoryAccess and let MemorySSA set its defining access.
245+
MemoryAccess *NewMemAcc =
246+
MSSAU->createMemoryAccessInBB(IC, nullptr, N, MemorySSA::Beginning);
247+
if (NewMemAcc) {
248+
if (auto *MemDef = dyn_cast<MemoryDef>(NewMemAcc))
249+
MSSAU->insertDef(MemDef, /*RenameUses=*/true);
250+
else {
251+
auto *MemUse = cast<MemoryUse>(NewMemAcc);
252+
MSSAU->insertUse(MemUse, /*RenameUses=*/true);
253+
}
254+
}
255+
}
256+
233257
// Replaces uses of I with IC in N
234258
I.replaceUsesWithIf(IC, [N](Use &U) {
235259
return cast<Instruction>(U.getUser())->getParent() == N;
@@ -244,6 +268,11 @@ static bool sinkInstruction(Loop &L, Instruction &I,
244268
NumLoopSunk++;
245269
I.moveBefore(&*MoveBB->getFirstInsertionPt());
246270

271+
if (MSSAU)
272+
if (MemoryUseOrDef *OldMemAcc = cast_or_null<MemoryUseOrDef>(
273+
MSSAU->getMemorySSA()->getMemoryAccess(&I)))
274+
MSSAU->moveToPlace(OldMemAcc, MoveBB, MemorySSA::Beginning);
275+
247276
return true;
248277
}
249278

@@ -252,15 +281,14 @@ static bool sinkInstruction(Loop &L, Instruction &I,
252281
static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI,
253282
DominatorTree &DT,
254283
BlockFrequencyInfo &BFI,
255-
ScalarEvolution *SE) {
284+
ScalarEvolution *SE,
285+
AliasSetTracker *CurAST,
286+
MemorySSA *MSSA) {
256287
BasicBlock *Preheader = L.getLoopPreheader();
257-
if (!Preheader)
258-
return false;
288+
assert(Preheader && "Expected loop to have preheader");
259289

260-
// Enable LoopSink only when runtime profile is available.
261-
// With static profile, the sinking decision may be sub-optimal.
262-
if (!Preheader->getParent()->hasProfileData())
263-
return false;
290+
assert(Preheader->getParent()->hasProfileData() &&
291+
"Unexpected call when profile data unavailable.");
264292

265293
const BlockFrequency PreheaderFreq = BFI.getBlockFreq(Preheader);
266294
// If there are no basic blocks with lower frequency than the preheader then
@@ -271,13 +299,15 @@ static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI,
271299
}))
272300
return false;
273301

274-
bool Changed = false;
275-
AliasSetTracker CurAST(AA);
302+
std::unique_ptr<MemorySSAUpdater> MSSAU;
303+
std::unique_ptr<SinkAndHoistLICMFlags> LICMFlags;
304+
if (MSSA) {
305+
MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
306+
LICMFlags =
307+
std::make_unique<SinkAndHoistLICMFlags>(/*IsSink=*/true, &L, MSSA);
308+
}
276309

277-
// Compute alias set.
278-
for (BasicBlock *BB : L.blocks())
279-
CurAST.add(*BB);
280-
CurAST.add(*Preheader);
310+
bool Changed = false;
281311

282312
// Sort loop's basic blocks by frequency
283313
SmallVector<BasicBlock *, 10> ColdLoopBBs;
@@ -300,9 +330,11 @@ static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI,
300330
// No need to check for instruction's operands are loop invariant.
301331
assert(L.hasLoopInvariantOperands(I) &&
302332
"Insts in a loop's preheader should have loop invariant operands!");
303-
if (!canSinkOrHoistInst(*I, &AA, &DT, &L, &CurAST, nullptr, false))
333+
if (!canSinkOrHoistInst(*I, &AA, &DT, &L, CurAST, MSSAU.get(), false,
334+
LICMFlags.get()))
304335
continue;
305-
if (sinkInstruction(L, *I, ColdLoopBBs, LoopBlockNumber, LI, DT, BFI))
336+
if (sinkInstruction(L, *I, ColdLoopBBs, LoopBlockNumber, LI, DT, BFI,
337+
MSSAU.get()))
306338
Changed = true;
307339
}
308340

@@ -311,6 +343,13 @@ static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI,
311343
return Changed;
312344
}
313345

346+
static void computeAliasSet(Loop &L, BasicBlock &Preheader,
347+
AliasSetTracker &CurAST) {
348+
for (BasicBlock *BB : L.blocks())
349+
CurAST.add(*BB);
350+
CurAST.add(Preheader);
351+
}
352+
314353
PreservedAnalyses LoopSinkPass::run(Function &F, FunctionAnalysisManager &FAM) {
315354
LoopInfo &LI = FAM.getResult<LoopAnalysis>(F);
316355
// Nothing to do if there are no loops.
@@ -321,6 +360,10 @@ PreservedAnalyses LoopSinkPass::run(Function &F, FunctionAnalysisManager &FAM) {
321360
DominatorTree &DT = FAM.getResult<DominatorTreeAnalysis>(F);
322361
BlockFrequencyInfo &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
323362

363+
MemorySSA *MSSA = EnableMSSAInLoopSink
364+
? &FAM.getResult<MemorySSAAnalysis>(F).getMSSA()
365+
: nullptr;
366+
324367
// We want to do a postorder walk over the loops. Since loops are a tree this
325368
// is equivalent to a reversed preorder walk and preorder is easy to compute
326369
// without recursion. Since we reverse the preorder, we will visit siblings
@@ -332,18 +375,42 @@ PreservedAnalyses LoopSinkPass::run(Function &F, FunctionAnalysisManager &FAM) {
332375
do {
333376
Loop &L = *PreorderLoops.pop_back_val();
334377

378+
BasicBlock *Preheader = L.getLoopPreheader();
379+
if (!Preheader)
380+
continue;
381+
382+
// Enable LoopSink only when runtime profile is available.
383+
// With static profile, the sinking decision may be sub-optimal.
384+
if (!Preheader->getParent()->hasProfileData())
385+
continue;
386+
387+
std::unique_ptr<AliasSetTracker> CurAST;
388+
if (!EnableMSSAInLoopSink) {
389+
CurAST = std::make_unique<AliasSetTracker>(AA);
390+
computeAliasSet(L, *Preheader, *CurAST.get());
391+
}
392+
335393
// Note that we don't pass SCEV here because it is only used to invalidate
336394
// loops in SCEV and we don't preserve (or request) SCEV at all making that
337395
// unnecessary.
338396
Changed |= sinkLoopInvariantInstructions(L, AA, LI, DT, BFI,
339-
/*ScalarEvolution*/ nullptr);
397+
/*ScalarEvolution*/ nullptr,
398+
CurAST.get(), MSSA);
340399
} while (!PreorderLoops.empty());
341400

342401
if (!Changed)
343402
return PreservedAnalyses::all();
344403

345404
PreservedAnalyses PA;
346405
PA.preserveSet<CFGAnalyses>();
406+
407+
if (MSSA) {
408+
PA.preserve<MemorySSAAnalysis>();
409+
410+
if (VerifyMemorySSA)
411+
MSSA->verifyMemorySSA();
412+
}
413+
347414
return PA;
348415
}
349416

@@ -358,19 +425,46 @@ struct LegacyLoopSinkPass : public LoopPass {
358425
if (skipLoop(L))
359426
return false;
360427

428+
BasicBlock *Preheader = L->getLoopPreheader();
429+
if (!Preheader)
430+
return false;
431+
432+
// Enable LoopSink only when runtime profile is available.
433+
// With static profile, the sinking decision may be sub-optimal.
434+
if (!Preheader->getParent()->hasProfileData())
435+
return false;
436+
437+
AAResults &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
361438
auto *SE = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
362-
return sinkLoopInvariantInstructions(
363-
*L, getAnalysis<AAResultsWrapperPass>().getAAResults(),
364-
getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
439+
std::unique_ptr<AliasSetTracker> CurAST;
440+
MemorySSA *MSSA = nullptr;
441+
if (EnableMSSAInLegacyLoopSink)
442+
MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
443+
else {
444+
CurAST = std::make_unique<AliasSetTracker>(AA);
445+
computeAliasSet(*L, *Preheader, *CurAST.get());
446+
}
447+
448+
bool Changed = sinkLoopInvariantInstructions(
449+
*L, AA, getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
365450
getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
366451
getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI(),
367-
SE ? &SE->getSE() : nullptr);
452+
SE ? &SE->getSE() : nullptr, CurAST.get(), MSSA);
453+
454+
if (MSSA && VerifyMemorySSA)
455+
MSSA->verifyMemorySSA();
456+
457+
return Changed;
368458
}
369459

370460
void getAnalysisUsage(AnalysisUsage &AU) const override {
371461
AU.setPreservesCFG();
372462
AU.addRequired<BlockFrequencyInfoWrapperPass>();
373463
getLoopAnalysisUsage(AU);
464+
if (EnableMSSAInLegacyLoopSink) {
465+
AU.addRequired<MemorySSAWrapperPass>();
466+
AU.addPreserved<MemorySSAWrapperPass>();
467+
}
374468
}
375469
};
376470
}
@@ -380,6 +474,7 @@ INITIALIZE_PASS_BEGIN(LegacyLoopSinkPass, "loop-sink", "Loop Sink", false,
380474
false)
381475
INITIALIZE_PASS_DEPENDENCY(LoopPass)
382476
INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
477+
INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
383478
INITIALIZE_PASS_END(LegacyLoopSinkPass, "loop-sink", "Loop Sink", false, false)
384479

385480
Pass *llvm::createLoopSinkPass() { return new LegacyLoopSinkPass(); }

llvm/test/Transforms/LICM/loopsink-pr38462.ll

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
; RUN: opt -S -loop-sink < %s | FileCheck %s
2+
; RUN: opt -S -verify-memoryssa -enable-mssa-in-legacy-loop-sink -loop-sink < %s | FileCheck %s
3+
; RUN: opt -S -aa-pipeline=basic-aa -passes=loop-sink < %s | FileCheck %s
4+
; RUN: opt -S -verify-memoryssa -enable-mssa-in-loop-sink -aa-pipeline=basic-aa -passes=loop-sink < %s | FileCheck %s
25

36
target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
47
target triple = "x86_64-pc-windows-msvc19.13.26128"

llvm/test/Transforms/LICM/loopsink-pr39570.ll

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
; RUN: opt -S -loop-sink < %s | FileCheck %s
2+
; RUN: opt -S -verify-memoryssa -enable-mssa-in-legacy-loop-sink -loop-sink < %s | FileCheck %s
3+
; RUN: opt -S -aa-pipeline=basic-aa -passes=loop-sink < %s | FileCheck %s
4+
; RUN: opt -S -verify-memoryssa -enable-mssa-in-loop-sink -aa-pipeline=basic-aa -passes=loop-sink < %s | FileCheck %s
25

36
; CHECK: pr39570
47
; Make sure not to assert.

llvm/test/Transforms/LICM/loopsink-pr39695.ll

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
; RUN: opt -S -loop-sink < %s | FileCheck %s
2+
; RUN: opt -S -verify-memoryssa -enable-mssa-in-legacy-loop-sink -loop-sink < %s | FileCheck %s
3+
; RUN: opt -S -aa-pipeline=basic-aa -passes=loop-sink < %s | FileCheck %s
4+
; RUN: opt -S -verify-memoryssa -enable-mssa-in-loop-sink -aa-pipeline=basic-aa -passes=loop-sink < %s | FileCheck %s
25

36
; The load instruction should not be sunk into following loop.
47
; CHECK: @foo

llvm/test/Transforms/LICM/loopsink.ll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
; RUN: opt -S -loop-sink < %s | FileCheck %s
2+
; RUN: opt -S -verify-memoryssa -enable-mssa-in-legacy-loop-sink -loop-sink < %s | FileCheck %s
23
; RUN: opt -S -aa-pipeline=basic-aa -passes=loop-sink < %s | FileCheck %s
4+
; RUN: opt -S -verify-memoryssa -enable-mssa-in-loop-sink -aa-pipeline=basic-aa -passes=loop-sink < %s | FileCheck %s
35

46
@g = global i32 0, align 4
57

0 commit comments

Comments
 (0)