@@ -1527,6 +1527,7 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
1527
1527
}
1528
1528
1529
1529
// Check that copy is full with static size.
1530
+ // TODO: use coversInputFully instead here
1530
1531
const DataLayout &DL = DestAlloca->getDataLayout ();
1531
1532
std::optional<TypeSize> SrcSize = SrcAlloca->getAllocationSize (DL);
1532
1533
if (!SrcSize || Size != *SrcSize) {
@@ -1719,6 +1720,245 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
1719
1720
return true ;
1720
1721
}
1721
1722
1723
+ // This method checks if possible to lift an instruction to position P.
1724
+ // It will check that it could lift the instruction and its argument.
1725
+ // The method returns true if it would be successful.
1726
+ bool MemCpyOptPass::canMoveUp (Instruction *I, Instruction *P) {
1727
+ if (I->getIterator () == std::next (P->getIterator ()))
1728
+ return true ;
1729
+ // TODO: check isGuaranteedToTransferExecutionToSuccessor on all instructions
1730
+ // between I and P
1731
+ SmallSet<const Instruction *, 20 > Visited;
1732
+ SmallVector<Instruction *, 8 > Worklist;
1733
+ Worklist.push_back (I);
1734
+ while (!Worklist.empty ()) {
1735
+ Instruction *I = Worklist.pop_back_val ();
1736
+ for (Value *Op : I->operands ()) {
1737
+ Instruction *U = dyn_cast<Instruction>(Op);
1738
+ if (U && !DT->dominates (U, P)) { // (U dom P) is sufficient, but is it the
1739
+ // necessary condition?
1740
+ // Cannot hoist if this has visible side-effects or is a PHINode (which
1741
+ // would require converting to a SelectInst in moveUp)
1742
+ if (isa<PHINode>(U) || U->mayReadOrWriteMemory ())
1743
+ return false ;
1744
+ if (!Visited.insert (U).second )
1745
+ continue ; // Already examined this in another branch
1746
+ assert (DT->dominates (P, U)); // Ensure that SSA is correct
1747
+ Worklist.push_back (U);
1748
+ }
1749
+ }
1750
+ }
1751
+ return true ;
1752
+ }
1753
+
1754
+ // Unconditionally move I to P, including all operands
1755
+ void MemCpyOptPass::moveUp (Instruction *I, Instruction *P) {
1756
+ if (I->getIterator () == std::next (P->getIterator ()))
1757
+ return ;
1758
+ // Compute a valid instruction order with DFS walk
1759
+ SmallSet<const Instruction *, 20 > Lifted;
1760
+ SmallVector<Instruction *, 8 > PreOrder;
1761
+ SmallVector<Instruction *, 8 > PostOrder;
1762
+ PreOrder.push_back (I);
1763
+ while (!PreOrder.empty ()) {
1764
+ Instruction *I = PreOrder.back ();
1765
+ if (Lifted.count (I))
1766
+ continue ;
1767
+ for (Value *Op : I->operands ()) {
1768
+ Instruction *U = dyn_cast<Instruction>(Op);
1769
+ if (U && !Lifted.count (U) && !DT->dominates (U, P)) {
1770
+ assert (!isa<PHINode>(U));
1771
+ PreOrder.push_back (U);
1772
+ }
1773
+ }
1774
+ if (PreOrder.back () == I) {
1775
+ // All ops scheduled, can now schedule this too
1776
+ Lifted.insert (I);
1777
+ PreOrder.pop_back ();
1778
+ PostOrder.push_back (I);
1779
+ }
1780
+ }
1781
+
1782
+ MemoryUseOrDef *MemInsertPoint = MSSA->getMemoryAccess (P);
1783
+ assert (MemInsertPoint);
1784
+
1785
+ // Now move them
1786
+ for (Instruction *I : PostOrder) {
1787
+ LLVM_DEBUG (dbgs () << " Lifting " << *I << " after " << *P << " \n " );
1788
+ if (I->getIterator () == std::next (P->getIterator ())) {
1789
+ P = I;
1790
+ continue ;
1791
+ }
1792
+ I->moveAfter (P);
1793
+ P = I;
1794
+ if (MemoryUseOrDef *MA = MSSA->getMemoryAccess (I)) {
1795
+ MSSAU->moveAfter (MA, MemInsertPoint);
1796
+ MemInsertPoint = MA;
1797
+ }
1798
+ }
1799
+ }
1800
+
1801
+ bool MemCpyOptPass::performStackHoistOptzn (MemCpyInst *M,
1802
+ AllocaInst *DestAlloca,
1803
+ TypeSize Size, BatchAAResults &BAA) {
1804
+ LLVM_DEBUG (dbgs () << " Stack Hoist: Attempting to optimize:\n " << *M << " \n " );
1805
+
1806
+ // TODO: this should be fine as long as there are only LoadInst and GEP, but
1807
+ // we would need more effort to re-write the GEP addrspaces
1808
+ if (DestAlloca->getType () != M->getSource ()->getType ())
1809
+ return false ;
1810
+
1811
+ // Check that copy is full with static size.
1812
+ // TODO: use coversInputFully instead here
1813
+ const DataLayout &DL = DestAlloca->getDataLayout ();
1814
+ std::optional<TypeSize> DestSize = DestAlloca->getAllocationSize (DL);
1815
+ if (!DestSize || Size != *DestSize) {
1816
+ LLVM_DEBUG (dbgs () << " Stack Hoist: Destination alloca size mismatch\n " );
1817
+ return false ;
1818
+ }
1819
+
1820
+ MemoryUseOrDef *MA = MSSA->getMemoryAccess (M);
1821
+ if (!MA)
1822
+ // Degenerate case: memcpy marked as not accessing memory.
1823
+ return false ;
1824
+
1825
+ // The source align must be larger than or equal the alloca's
1826
+ // align. If not so, we check to see if we can force the source of the memcpy
1827
+ // to the alignment we need. If we fail, we bail out.
1828
+ // TODO: we don't have to bail out if we only have LoadInst or other
1829
+ // intrinsics (e.g. memcpy) and can instead reduce the alignment of the use.
1830
+ Align MAlign = M->getSourceAlign ().valueOrOne ();
1831
+ Align AllocaAlign = DestAlloca->getAlign ();
1832
+ if (MAlign < AllocaAlign &&
1833
+ getOrEnforceKnownAlignment (M->getSource (), AllocaAlign, DL, M, AC, DT) <
1834
+ AllocaAlign)
1835
+ return false ;
1836
+
1837
+ // Check that dest is never captured, unescaped alloca.
1838
+ MemoryLocation DestLoc (DestAlloca, LocationSize::precise (Size));
1839
+ MemoryLocation SrcLoc = MemoryLocation::getForSource (M);
1840
+ SmallVector<Instruction *, 4 > LifetimeMarkers;
1841
+ SmallVector<std::pair<Instruction *, bool >, 4 > AAMetadataInstrs;
1842
+
1843
+ SmallVector<std::pair<Instruction *, bool >, 8 > Worklist;
1844
+ Worklist.push_back ({DestAlloca, true });
1845
+ unsigned MaxUsesToExplore = getDefaultMaxUsesToExploreForCaptureTracking ();
1846
+ Worklist.reserve (MaxUsesToExplore);
1847
+ SmallSet<const Instruction *, 20 > Visited;
1848
+ while (!Worklist.empty ()) {
1849
+ Instruction *I;
1850
+ bool mayMove;
1851
+ std::tie (I, mayMove) = Worklist.pop_back_val ();
1852
+ for (const Use &U : I->uses ()) {
1853
+ auto *UI = cast<Instruction>(U.getUser ());
1854
+ // We don't care about the store itself.
1855
+ if (UI == M)
1856
+ continue ;
1857
+ if (UI->isLifetimeStartOrEnd ()) {
1858
+ LifetimeMarkers.push_back (UI);
1859
+ continue ;
1860
+ }
1861
+ if (Visited.size () >= MaxUsesToExplore) {
1862
+ LLVM_DEBUG (
1863
+ dbgs ()
1864
+ << " Stack Hoist: Exceeded max uses to see ModRef, bailing\n " );
1865
+ return false ;
1866
+ }
1867
+ if (!Visited.insert (UI).second )
1868
+ continue ;
1869
+ UseCaptureInfo CI (CaptureComponents::None, CaptureComponents::None);
1870
+ for (const Use &U : UI->operands ()) {
1871
+ auto UCI = DetermineUseCaptureKind (U, DestAlloca);
1872
+ CI.UseCC |= UCI.UseCC ;
1873
+ CI.ResultCC |= UCI.ResultCC ;
1874
+ }
1875
+ if (capturesAnything (CI.UseCC ))
1876
+ return false ;
1877
+ if (!DT->dominates (UI, M)) {
1878
+ if (UI->mayWriteToMemory ()) {
1879
+ ModRefInfo Res = BAA.getModRefInfo (UI, DestLoc);
1880
+ if (isModSet (Res))
1881
+ return false ;
1882
+ mayMove = false ;
1883
+ }
1884
+ if (UI->mayReadFromMemory ()) { // TODO: (U dom P) is sufficient, but is
1885
+ // it the necessary condition?
1886
+ ModRefInfo Res = BAA.getModRefInfo (UI, DestLoc);
1887
+ if (isRefSet (Res)) {
1888
+ if (!DT->dominates (M, UI))
1889
+ // TODO: does writtenBetween care about dom, or just moveUp?
1890
+ return false ;
1891
+ // If UI can modify SrcLoc, then we cannot alias it to DestLoc
1892
+ ModRefInfo Res = BAA.getModRefInfo (UI, SrcLoc);
1893
+ if (isModSet (Res))
1894
+ return false ;
1895
+ bool moveUp = writtenBetween (MSSA, BAA, SrcLoc, MA,
1896
+ MSSA->getMemoryAccess (UI));
1897
+ if (moveUp) {
1898
+ // It is safe to move this read up as long as the memory it reads
1899
+ // doesn't change between UI and M (such as only reading DestLoc
1900
+ // with a LoadInst, or argmemonly), and UI post-dominates M (so
1901
+ // we are guaranteed to eventually execute UI)
1902
+ if (!mayMove)
1903
+ return false ;
1904
+ if (!isa<LoadInst>(UI))
1905
+ return false ;
1906
+ if (!PDT->dominates (UI, M))
1907
+ return false ;
1908
+ }
1909
+ AAMetadataInstrs.push_back ({UI, moveUp});
1910
+ }
1911
+ }
1912
+ }
1913
+ if (capturesAnything (CI.ResultCC )) {
1914
+ if (mayMove)
1915
+ // If this instruction may capture something other than UI (such as a
1916
+ // SelectInst) or have other side-effects (such as a call), then we
1917
+ // should not try to move this instruction, or instructions that use
1918
+ // this. (canMoveUp will later check this for the other operands too)
1919
+ mayMove = isa<GEPOperator>(UI);
1920
+ Worklist.push_back ({UI, mayMove});
1921
+ }
1922
+ }
1923
+ }
1924
+
1925
+ // Nothing useful to to (the alloca is dead anyways and later passes will
1926
+ // likely remove it) Exit early now to minimize spurious test changes
1927
+ // TODO: delete in a later PR
1928
+ if (AAMetadataInstrs.empty ())
1929
+ return false ;
1930
+
1931
+ // Check that all instructions we need to move will be able to move
1932
+ // Otherwise it may not be worthwhile to move any of them.
1933
+ for (auto &I : AAMetadataInstrs) {
1934
+ if (I.second && !canMoveUp (I.first , M))
1935
+ return false ;
1936
+ }
1937
+
1938
+ // Remove all lifetime markers before RAUW since they no longer apply.
1939
+ for (Instruction *I : LifetimeMarkers)
1940
+ eraseInstruction (I);
1941
+
1942
+ // As this transformation can cause memory accesses that didn't previously
1943
+ // alias to begin to alias one another, we remove !alias.scope, !noalias,
1944
+ // !tbaa and !tbaa_struct metadata from any uses of either alloca.
1945
+ // This is conservative, but more precision (replacing with the source tags
1946
+ // from M) doesn't seem worthwhile right now.
1947
+ for (auto &U : AAMetadataInstrs) {
1948
+ Instruction *I = U.first ;
1949
+ I->setMetadata (LLVMContext::MD_alias_scope, nullptr );
1950
+ I->setMetadata (LLVMContext::MD_noalias, nullptr );
1951
+ I->setMetadata (LLVMContext::MD_tbaa, nullptr );
1952
+ I->setMetadata (LLVMContext::MD_tbaa_struct, nullptr );
1953
+ if (U.second )
1954
+ moveUp (I, M);
1955
+ }
1956
+ DestAlloca->replaceAllUsesWith (M->getSource ());
1957
+ eraseInstruction (DestAlloca);
1958
+
1959
+ return true ;
1960
+ }
1961
+
1722
1962
static bool isZeroSize (Value *Size) {
1723
1963
if (auto *I = dyn_cast<Instruction>(Size))
1724
1964
if (auto *Res = simplifyInstruction (I, I->getDataLayout ()))
@@ -1842,25 +2082,39 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
1842
2082
}
1843
2083
}
1844
2084
1845
- // If the transfer is from a stack slot to a stack slot, then we may be able
1846
- // to perform the stack-move optimization. See the comments in
1847
- // performStackMoveOptzn() for more details.
1848
2085
auto *DestAlloca = dyn_cast<AllocaInst>(M->getDest ());
1849
- if (!DestAlloca)
1850
- return false ;
1851
2086
auto *SrcAlloca = dyn_cast<AllocaInst>(M->getSource ());
1852
- if (!SrcAlloca)
2087
+ // Remaining transforms are mainly only interesting if we can eliminate some
2088
+ // stack data
2089
+ if (!DestAlloca && !SrcAlloca)
1853
2090
return false ;
2091
+
1854
2092
ConstantInt *Len = dyn_cast<ConstantInt>(M->getLength ());
1855
- if (Len == nullptr )
1856
- return false ;
1857
- if (performStackMoveOptzn (M, M, DestAlloca, SrcAlloca,
1858
- TypeSize::getFixed (Len->getZExtValue ()), BAA)) {
1859
- // Avoid invalidating the iterator.
1860
- BBI = M->getNextNonDebugInstruction ()->getIterator ();
1861
- eraseInstruction (M);
1862
- ++NumMemCpyInstr;
1863
- return true ;
2093
+ // If the transfer is from a stack slot to a stack slot, then we may be able
2094
+ // to perform the stack-move optimization. See the comments in
2095
+ // performStackMoveOptzn() for more details.
2096
+ if (DestAlloca && SrcAlloca && Len) {
2097
+ if (performStackMoveOptzn (M, M, DestAlloca, SrcAlloca,
2098
+ TypeSize::getFixed (Len->getZExtValue ()), BAA)) {
2099
+ // Avoid invalidating the iterator.
2100
+ BBI = M->getNextNonDebugInstruction ()->getIterator ();
2101
+ eraseInstruction (M);
2102
+ ++NumMemCpyInstr;
2103
+ return true ;
2104
+ }
2105
+ }
2106
+
2107
+ if (DestAlloca && Len) {
2108
+ // If the transfer is to a stack slot, see if we can hoist all the uses of
2109
+ // the stack slot to here instead
2110
+ if (performStackHoistOptzn (M, DestAlloca,
2111
+ TypeSize::getFixed (Len->getZExtValue ()), BAA)) {
2112
+ // Avoid invalidating the iterator.
2113
+ BBI = M->getNextNonDebugInstruction ()->getIterator ();
2114
+ eraseInstruction (M);
2115
+ ++NumMemCpyInstr;
2116
+ return true ;
2117
+ }
1864
2118
}
1865
2119
1866
2120
return false ;
0 commit comments