19
19
20
20
#include " llvm/Transforms/Scalar/LowerMatrixIntrinsics.h"
21
21
#include " llvm/ADT/PostOrderIterator.h"
22
+ #include " llvm/ADT/ScopeExit.h"
22
23
#include " llvm/ADT/SmallSet.h"
23
24
#include " llvm/ADT/SmallVector.h"
24
25
#include " llvm/Analysis/AliasAnalysis.h"
@@ -990,12 +991,15 @@ class LowerMatrixIntrinsics {
990
991
bool Changed = false ;
991
992
SmallVector<CallInst *, 16 > MaybeFusableInsts;
992
993
SmallVector<Instruction *, 16 > MatrixInsts;
994
+ SmallVector<IntrinsicInst *, 16 > LifetimeEnds;
993
995
994
996
// First, collect all instructions with shape information and candidates for
995
997
// fusion (currently only matrix multiplies).
996
998
ReversePostOrderTraversal<Function *> RPOT (&Func);
997
999
for (auto *BB : RPOT)
998
1000
for (Instruction &I : *BB) {
1001
+ if (match (&I, m_Intrinsic<Intrinsic::lifetime_end>()))
1002
+ LifetimeEnds.push_back (cast<IntrinsicInst>(&I));
999
1003
if (ShapeMap.find (&I) == ShapeMap.end ())
1000
1004
continue ;
1001
1005
if (match (&I, m_Intrinsic<Intrinsic::matrix_multiply>()))
@@ -1010,7 +1014,7 @@ class LowerMatrixIntrinsics {
1010
1014
1011
1015
// Third, try to fuse candidates.
1012
1016
for (CallInst *CI : MaybeFusableInsts)
1013
- LowerMatrixMultiplyFused (CI, FusedInsts);
1017
+ LowerMatrixMultiplyFused (CI, FusedInsts, LifetimeEnds );
1014
1018
1015
1019
Changed = !FusedInsts.empty ();
1016
1020
@@ -1856,8 +1860,10 @@ class LowerMatrixIntrinsics {
1856
1860
// /
1857
1861
// / Call finalizeLowering on lowered instructions. Instructions that are
1858
1862
// / completely eliminated by fusion are added to \p FusedInsts.
1859
- void LowerMatrixMultiplyFused (CallInst *MatMul,
1860
- SmallPtrSetImpl<Instruction *> &FusedInsts) {
1863
+ void
1864
+ LowerMatrixMultiplyFused (CallInst *MatMul,
1865
+ SmallPtrSetImpl<Instruction *> &FusedInsts,
1866
+ SmallVector<IntrinsicInst *, 16 > &LifetimeEnds) {
1861
1867
if (!FuseMatrix || !DT)
1862
1868
return ;
1863
1869
@@ -1946,6 +1952,55 @@ class LowerMatrixIntrinsics {
1946
1952
for (Instruction *I : ToHoist)
1947
1953
I->moveBefore (MatMul);
1948
1954
1955
+ // Deal with lifetime.end calls that might be between Load0/Load1 and the
1956
+ // store. To avoid introducing loads to dead objects (i.e. after the
1957
+ // lifetime has been termined by @llvm.lifetime.end), either sink them
1958
+ // after the store if in the same block, or remove the lifetime.end marker
1959
+ // otherwise. This might pessimize further optimizations, by extending the
1960
+ // lifetime of the object until the function returns, but should be
1961
+ // conservatively correct.
1962
+ MemoryLocation Load0Loc = MemoryLocation::get (LoadOp0);
1963
+ MemoryLocation Load1Loc = MemoryLocation::get (LoadOp1);
1964
+ BasicBlock *StoreParent = Store->getParent ();
1965
+ bool FusableOpsInSameBlock = LoadOp0->getParent () == StoreParent &&
1966
+ LoadOp1->getParent () == StoreParent;
1967
+ for (unsigned Idx = 0 ; Idx != LifetimeEnds.size ();) {
1968
+ IntrinsicInst *End = LifetimeEnds[Idx];
1969
+ auto Inc = make_scope_exit ([&Idx]() { Idx++; });
1970
+ // If the lifetime.end is guaranteed to be before the loads or after the
1971
+ // store, it won't interfere with fusion.
1972
+ if (DT->dominates (End, LoadOp0) && DT->dominates (End, LoadOp1))
1973
+ continue ;
1974
+ if (DT->dominates (Store, End))
1975
+ continue ;
1976
+ // If all fusable ops are in the same block and the lifetime.end is in a
1977
+ // different block, it won't interfere with fusion.
1978
+ if (FusableOpsInSameBlock && End->getParent () != StoreParent)
1979
+ continue ;
1980
+
1981
+ // If the loads don't alias the lifetime.end, it won't interfere with
1982
+ // fusion.
1983
+ MemoryLocation EndLoc = MemoryLocation::getForArgument (End, 1 , nullptr );
1984
+ if (!EndLoc.Ptr )
1985
+ continue ;
1986
+ if (AA->isNoAlias (Load0Loc, EndLoc) && AA->isNoAlias (Load1Loc, EndLoc))
1987
+ continue ;
1988
+
1989
+ // If both lifetime.end and the store are in the same block, extend the
1990
+ // lifetime until after the store, so the new lifetime covers the loads
1991
+ // we introduce later.
1992
+ if (End->getParent () == StoreParent) {
1993
+ End->moveAfter (Store);
1994
+ continue ;
1995
+ }
1996
+
1997
+ // Otherwise remove the conflicting lifetime.end marker.
1998
+ ToRemove.push_back (End);
1999
+ std::swap (LifetimeEnds[Idx], LifetimeEnds.back ());
2000
+ LifetimeEnds.pop_back ();
2001
+ Inc.release ();
2002
+ }
2003
+
1949
2004
emitSIMDTiling (MatMul, LoadOp0, LoadOp1, Store, FusedInsts);
1950
2005
return ;
1951
2006
}
0 commit comments