Skip to content

Commit 6c93798

Browse files
committed
SILOptimizer: Add a new TempRValue optimization pass
This is a separate optimization that detects short-lived temporaries that can be eliminated. This is necessary now that SILGen no longer performs basic RValue forwarding in some cases. SR-5508: Performance regression in benchmarks caused by removing SILGen peephole for LoadExpr in +0 context
1 parent cfb2a87 commit 6c93798

File tree

4 files changed

+584
-0
lines changed

4 files changed

+584
-0
lines changed

include/swift/SILOptimizer/PassManager/Passes.def

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,8 @@ IRGEN_PASS(LoadableByAddress, "loadable-address",
226226
"SIL Large Loadable type by-address lowering.")
227227
PASS(RemovePins, "remove-pins",
228228
"Remove SIL pin/unpin pairs")
229+
PASS(TempRValueOpt, "temp-rvalue-opt",
230+
"Remove short-lived immutable temporary copies")
229231
PASS(SideEffectsDumper, "side-effects-dump",
230232
"Print Side-Effect Information for all Functions")
231233
PASS(SILCleanup, "cleanup",

lib/SILOptimizer/PassManager/PassPipeline.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,9 @@ static void addPerfEarlyModulePassPipeline(SILPassPipelinePlan &P) {
306306
P.addDeadFunctionElimination();
307307
// Start by cloning functions from stdlib.
308308
P.addSILLinker();
309+
310+
// Cleanup after SILGen: remove trivial copies to temporaries.
311+
P.addTempRValueOpt();
309312
}
310313

311314
static void addHighLevelEarlyLoopOptPipeline(SILPassPipelinePlan &P) {

lib/SILOptimizer/Transforms/CopyForwarding.cpp

Lines changed: 235 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@
6161
#include "swift/SIL/SILArgument.h"
6262
#include "swift/SIL/SILBuilder.h"
6363
#include "swift/SIL/SILVisitor.h"
64+
#include "swift/SILOptimizer/Analysis/AliasAnalysis.h"
6465
#include "swift/SILOptimizer/Analysis/DominanceAnalysis.h"
6566
#include "swift/SILOptimizer/Analysis/PostOrderAnalysis.h"
6667
#include "swift/SILOptimizer/PassManager/Passes.h"
@@ -1324,10 +1325,244 @@ class CopyForwardingPass : public SILFunctionTransform
13241325
invalidateAnalysis(SILAnalysis::InvalidationKind::CallsAndInstructions);
13251326
}
13261327
}
1328+
};
1329+
1330+
/// Temporary RValue Optimization
1331+
///
1332+
/// Peephole optimization to eliminate short-lived immutable temporary copies.
1333+
/// This handles a common pattern generated by SILGen where temporary RValues
1334+
/// are emitted as copies...
1335+
///
1336+
/// %temp = alloc_stack $T
1337+
/// copy_addr %src to [initialization] %temp : $*T
1338+
/// // no writes to %src and %temp
1339+
/// destroy_addr %temp : $*T
1340+
/// dealloc_stack %temp : $*T
1341+
///
1342+
/// This differs from the copy forwarding algorithm because it handles
1343+
/// copy source and dest lifetimes that are unavoidably overlappying. Instead,
1344+
/// it finds cases in which it is easy to determine that the source is
1345+
/// unmodified during the copy destination's lifetime. Thus, the destination can
1346+
/// be viewed as a short-lived "rvalue".
1347+
class TempRValueOptPass : public SILFunctionTransform
1348+
{
1349+
AliasAnalysis *AA = nullptr;
1350+
1351+
static bool collectLoads(SILInstruction *CurrentInst, SILInstruction *addr,
1352+
llvm::SmallPtrSetImpl<SILInstruction *> &loadInsts);
1353+
1354+
bool checkNoSourceModification(CopyAddrInst *copyInst,
1355+
const llvm::SmallPtrSetImpl<SILInstruction *> &useInsts);
13271356

1357+
bool tryOptimizeCopyIntoTemp(CopyAddrInst *copyInst);
1358+
1359+
void run() override;
13281360
};
1361+
1362+
/// The main entry point of the pass.
1363+
void TempRValueOptPass::run() {
1364+
DEBUG(llvm::dbgs() << "Copy Peephole in Func " << getFunction()->getName()
1365+
<< "\n");
1366+
1367+
AA = PM->getAnalysis<AliasAnalysis>();
1368+
bool Changed = false;
1369+
1370+
// Find all copy_addr instructions.
1371+
for (auto &BB : *getFunction()) {
1372+
auto II = BB.begin();
1373+
while (II != BB.end()) {
1374+
auto *CopyInst = dyn_cast<CopyAddrInst>(&*II);
1375+
1376+
if (CopyInst) {
1377+
// In case of success, this may delete instructions, but not the
1378+
// CopyInst itself.
1379+
Changed |= tryOptimizeCopyIntoTemp(CopyInst);
1380+
}
1381+
1382+
// Increment the instruction iterator here. We can't do it at the begin of
1383+
// the loop because the instruction after CopyInst might be deleted in
1384+
// in tryOptimizeCopyIntoTemp. We can't do it at the end of the loop
1385+
// because the CopyInst might be deleted in the following code.
1386+
++II;
1387+
1388+
// Remove identity copies which are a result of this optimization.
1389+
if (CopyInst && CopyInst->getSrc() == CopyInst->getDest() &&
1390+
// Identity copies cannot take the source. This check is just here
1391+
// to be on the safe side.
1392+
!CopyInst->isTakeOfSrc()) {
1393+
// This is either the CopyInst which just got optimized or it is a
1394+
// follow-up from an earlier iteration, where another copy_addr copied
1395+
// the temporary back to the source location.
1396+
CopyInst->eraseFromParent();
1397+
}
1398+
}
1399+
}
1400+
1401+
if (Changed) {
1402+
invalidateAnalysis(SILAnalysis::InvalidationKind::Instructions);
1403+
}
1404+
}
1405+
1406+
/// Transitively explore all data flow uses of the given \p address until
1407+
/// reaching a load or returning false.
1408+
bool TempRValueOptPass::
1409+
collectLoads(SILInstruction *user, SILInstruction *address,
1410+
llvm::SmallPtrSetImpl<SILInstruction *> &loadInsts) {
1411+
// All normal uses (loads) must be in the initialization block.
1412+
// (The destroy and dealloc are commonly in a different block though.)
1413+
if (user->getParent() != address->getParent())
1414+
return false;
1415+
1416+
// Only allow uses that cannot destroy their operand. We need to be sure
1417+
// that replacing all this temporary's uses with the copy source doesn't
1418+
// destroy the source. This way, we know that the destroy_addr instructions
1419+
// that we recorded cover all the temporary's lifetime termination points.
1420+
//
1421+
// Currently we whitelist address projections and loads.
1422+
//
1423+
// TODO: handle non-destructive projections of enums
1424+
// (unchecked_take_enum_data_addr of Optional is nondestructive.)
1425+
switch (user->getKind()) {
1426+
default:
1427+
DEBUG(llvm::dbgs() << " Temp use may write/destroy its source" << *user);
1428+
return false;
1429+
1430+
case ValueKind::StructElementAddrInst:
1431+
case ValueKind::TupleElementAddrInst:
1432+
// Transitively look through projections on stack addresses.
1433+
for (auto *useOper : user->getUses()) {
1434+
if (!collectLoads(useOper->getUser(), user, loadInsts))
1435+
return false;
1436+
}
1437+
return true;
1438+
1439+
case ValueKind::LoadInst:
1440+
case ValueKind::LoadBorrowInst:
1441+
// Loads are the end of the data flow chain. The users of the load can't
1442+
// access the temporary storage.
1443+
loadInsts.insert(user);
1444+
return true;
1445+
1446+
case ValueKind::CopyAddrInst: {
1447+
// copy_addr which read from the temporary are like loads.
1448+
// TODO: Handle copy_addr [take]. But this doesn't seem to be important.
1449+
auto *copyFromTmp = cast<CopyAddrInst>(user);
1450+
if (copyFromTmp->getDest() == address || copyFromTmp->isTakeOfSrc()) {
1451+
DEBUG(llvm::dbgs() << " Temp written or taken" << *user);
1452+
return false;
1453+
}
1454+
loadInsts.insert(user);
1455+
return true;
1456+
}
1457+
}
1458+
}
1459+
1460+
/// Checks if the copy's source can be modified within the temporary's lifetime.
1461+
///
1462+
/// Unfortunately, we cannot simply use the destroy points as the lifetime end,
1463+
/// because they can be in a different basic block (that's what SILGen
1464+
/// generates). Instead we guarantee that all normal uses are within the block
1465+
/// of the temporary and look for the last use, which effectively ends the
1466+
/// lifetime.
1467+
bool TempRValueOptPass::checkNoSourceModification(CopyAddrInst *copyInst,
1468+
const llvm::SmallPtrSetImpl<SILInstruction *> &useInsts) {
1469+
unsigned NumLoadsFound = 0;
1470+
auto iter = std::next(copyInst->getIterator());
1471+
// We already checked that the useful lifetime of the temporary ends in
1472+
// the initialization block.
1473+
auto iterEnd = copyInst->getParent()->end();
1474+
for (; iter != iterEnd; ++iter) {
1475+
SILInstruction *I = &*iter;
1476+
1477+
if (useInsts.count(I))
1478+
NumLoadsFound++;
1479+
1480+
// If this is the last use of the temp we are ok. After this point,
1481+
// modifications to the source don't matter anymore.
1482+
if (NumLoadsFound == useInsts.size())
1483+
return true;
1484+
1485+
if (AA->mayWriteToMemory(I, copyInst->getSrc())) {
1486+
DEBUG(llvm::dbgs() << " Source modified by" << *iter);
1487+
return false;
1488+
}
1489+
}
1490+
// For some reason, not all normal uses have been seen between the copy and
1491+
// the end of the initialization block. We should never reach here.
1492+
return false;
1493+
}
1494+
1495+
/// Tries to perform the temporary rvalue copy elimination for \p copyInst
1496+
bool TempRValueOptPass::tryOptimizeCopyIntoTemp(CopyAddrInst *copyInst) {
1497+
if (copyInst->isTakeOfSrc() || !copyInst->isInitializationOfDest())
1498+
return false;
1499+
1500+
auto *tempObj = dyn_cast<AllocStackInst>(copyInst->getDest());
1501+
if (!tempObj)
1502+
return false;
1503+
1504+
assert(tempObj != copyInst->getSrc() &&
1505+
"can't initialize temporary with itself");
1506+
1507+
// Scan all uses of the temporary storage (tempObj) to verify they all refer
1508+
// to the value initialized by this copy. It is sufficient to check that the
1509+
// only users that modify memory are the copy_addr [initialization] and
1510+
// destroy_addr.
1511+
llvm::SmallPtrSet<SILInstruction *, 8> loadInsts;
1512+
for (auto *useOper : tempObj->getUses()) {
1513+
SILInstruction *user = useOper->getUser();
1514+
1515+
if (user == copyInst)
1516+
continue;
1517+
1518+
// Destroys and deallocations are allowed to be in a different block.
1519+
if (isa<DestroyAddrInst>(user) || isa<DeallocStackInst>(user))
1520+
continue;
1521+
1522+
if (!collectLoads(user, tempObj, loadInsts))
1523+
return false;
1524+
}
1525+
1526+
// Check if the source is modified within the lifetime of the temporary.
1527+
if (!checkNoSourceModification(copyInst, loadInsts))
1528+
return false;
1529+
1530+
DEBUG(llvm::dbgs() << " Success: replace temp" << *tempObj);
1531+
1532+
// Do a "replaceAllUses" by either deleting the users or replacing them with
1533+
// the source address. Note: we must not delete the original copyInst because
1534+
// it would crash the instruction iteration in run(). Instead the copyInst
1535+
// gets identical Src and Dest operands.
1536+
while (!tempObj->use_empty()) {
1537+
Operand *use = *tempObj->use_begin();
1538+
SILInstruction *user = use->getUser();
1539+
switch (user->getKind()) {
1540+
case ValueKind::DestroyAddrInst:
1541+
case ValueKind::DeallocStackInst:
1542+
user->eraseFromParent();
1543+
break;
1544+
case ValueKind::CopyAddrInst:
1545+
case ValueKind::StructElementAddrInst:
1546+
case ValueKind::TupleElementAddrInst:
1547+
case ValueKind::LoadInst:
1548+
case ValueKind::LoadBorrowInst:
1549+
use->set(copyInst->getSrc());
1550+
break;
1551+
1552+
default:
1553+
llvm_unreachable("unhandled instruction");
1554+
}
1555+
}
1556+
tempObj->eraseFromParent();
1557+
return true;
1558+
}
1559+
13291560
} // end anonymous namespace
13301561

13311562
SILTransform *swift::createCopyForwarding() {
13321563
return new CopyForwardingPass();
13331564
}
1565+
1566+
SILTransform *swift::createTempRValueOpt() {
1567+
return new TempRValueOptPass();
1568+
}

0 commit comments

Comments
 (0)