Skip to content

Commit e77378c

Browse files
authored
[Matrix] Adjust lifetime.ends during multiply fusion. (llvm#84914)
At the moment, loads introduced by multiply fusion may be placed after an objects lifetime has been terminated by lifetime.end. This introduces reads to dead objects. To avoid this, first collect all lifetime.end calls in the function. During fusion, we deal with any lifetime.end calls that may alias any of the loads. Such lifetime.end calls are either moved when possible (both the lifetime.end and the store are in the same block) or deleted. PR: llvm#84914
1 parent 0847c90 commit e77378c

File tree

2 files changed

+527
-18
lines changed

2 files changed

+527
-18
lines changed

llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp

Lines changed: 58 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
#include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h"
2121
#include "llvm/ADT/PostOrderIterator.h"
22+
#include "llvm/ADT/ScopeExit.h"
2223
#include "llvm/ADT/SmallSet.h"
2324
#include "llvm/ADT/SmallVector.h"
2425
#include "llvm/Analysis/AliasAnalysis.h"
@@ -990,12 +991,15 @@ class LowerMatrixIntrinsics {
990991
bool Changed = false;
991992
SmallVector<CallInst *, 16> MaybeFusableInsts;
992993
SmallVector<Instruction *, 16> MatrixInsts;
994+
SmallVector<IntrinsicInst *, 16> LifetimeEnds;
993995

994996
// First, collect all instructions with shape information and candidates for
995997
// fusion (currently only matrix multiplies).
996998
ReversePostOrderTraversal<Function *> RPOT(&Func);
997999
for (auto *BB : RPOT)
9981000
for (Instruction &I : *BB) {
1001+
if (match(&I, m_Intrinsic<Intrinsic::lifetime_end>()))
1002+
LifetimeEnds.push_back(cast<IntrinsicInst>(&I));
9991003
if (ShapeMap.find(&I) == ShapeMap.end())
10001004
continue;
10011005
if (match(&I, m_Intrinsic<Intrinsic::matrix_multiply>()))
@@ -1010,7 +1014,7 @@ class LowerMatrixIntrinsics {
10101014

10111015
// Third, try to fuse candidates.
10121016
for (CallInst *CI : MaybeFusableInsts)
1013-
LowerMatrixMultiplyFused(CI, FusedInsts);
1017+
LowerMatrixMultiplyFused(CI, FusedInsts, LifetimeEnds);
10141018

10151019
Changed = !FusedInsts.empty();
10161020

@@ -1856,8 +1860,10 @@ class LowerMatrixIntrinsics {
18561860
///
18571861
/// Call finalizeLowering on lowered instructions. Instructions that are
18581862
/// completely eliminated by fusion are added to \p FusedInsts.
1859-
void LowerMatrixMultiplyFused(CallInst *MatMul,
1860-
SmallPtrSetImpl<Instruction *> &FusedInsts) {
1863+
void
1864+
LowerMatrixMultiplyFused(CallInst *MatMul,
1865+
SmallPtrSetImpl<Instruction *> &FusedInsts,
1866+
SmallVector<IntrinsicInst *, 16> &LifetimeEnds) {
18611867
if (!FuseMatrix || !DT)
18621868
return;
18631869

@@ -1946,6 +1952,55 @@ class LowerMatrixIntrinsics {
19461952
for (Instruction *I : ToHoist)
19471953
I->moveBefore(MatMul);
19481954

1955+
// Deal with lifetime.end calls that might be between Load0/Load1 and the
1956+
// store. To avoid introducing loads to dead objects (i.e. after the
1957+
// lifetime has been termined by @llvm.lifetime.end), either sink them
1958+
// after the store if in the same block, or remove the lifetime.end marker
1959+
// otherwise. This might pessimize further optimizations, by extending the
1960+
// lifetime of the object until the function returns, but should be
1961+
// conservatively correct.
1962+
MemoryLocation Load0Loc = MemoryLocation::get(LoadOp0);
1963+
MemoryLocation Load1Loc = MemoryLocation::get(LoadOp1);
1964+
BasicBlock *StoreParent = Store->getParent();
1965+
bool FusableOpsInSameBlock = LoadOp0->getParent() == StoreParent &&
1966+
LoadOp1->getParent() == StoreParent;
1967+
for (unsigned Idx = 0; Idx != LifetimeEnds.size();) {
1968+
IntrinsicInst *End = LifetimeEnds[Idx];
1969+
auto Inc = make_scope_exit([&Idx]() { Idx++; });
1970+
// If the lifetime.end is guaranteed to be before the loads or after the
1971+
// store, it won't interfere with fusion.
1972+
if (DT->dominates(End, LoadOp0) && DT->dominates(End, LoadOp1))
1973+
continue;
1974+
if (DT->dominates(Store, End))
1975+
continue;
1976+
// If all fusable ops are in the same block and the lifetime.end is in a
1977+
// different block, it won't interfere with fusion.
1978+
if (FusableOpsInSameBlock && End->getParent() != StoreParent)
1979+
continue;
1980+
1981+
// If the loads don't alias the lifetime.end, it won't interfere with
1982+
// fusion.
1983+
MemoryLocation EndLoc = MemoryLocation::getForArgument(End, 1, nullptr);
1984+
if (!EndLoc.Ptr)
1985+
continue;
1986+
if (AA->isNoAlias(Load0Loc, EndLoc) && AA->isNoAlias(Load1Loc, EndLoc))
1987+
continue;
1988+
1989+
// If both lifetime.end and the store are in the same block, extend the
1990+
// lifetime until after the store, so the new lifetime covers the loads
1991+
// we introduce later.
1992+
if (End->getParent() == StoreParent) {
1993+
End->moveAfter(Store);
1994+
continue;
1995+
}
1996+
1997+
// Otherwise remove the conflicting lifetime.end marker.
1998+
ToRemove.push_back(End);
1999+
std::swap(LifetimeEnds[Idx], LifetimeEnds.back());
2000+
LifetimeEnds.pop_back();
2001+
Inc.release();
2002+
}
2003+
19492004
emitSIMDTiling(MatMul, LoadOp0, LoadOp1, Store, FusedInsts);
19502005
return;
19512006
}

0 commit comments

Comments
 (0)