@@ -1334,63 +1334,6 @@ static bool HoistThenElseCodeToIf(BranchInst *BI,
1334
1334
return true ;
1335
1335
}
1336
1336
1337
- // Return true if V0 and V1 are equivalent. This handles the obvious cases
1338
- // where V0 == V1 and V0 and V1 are both identical instructions, but also
1339
- // handles loads and stores with identical operands.
1340
- //
1341
- // Because determining if two memory instructions are equivalent
1342
- // depends on control flow, the \c At0 and \c At1 parameters specify a
1343
- // location for the query. This function is essentially answering the
1344
- // query "If V0 were moved to At0, and V1 were moved to At1, are V0 and V1
1345
- // equivalent?". In practice this means checking that moving V0 to At0
1346
- // doesn't cross any other memory instructions.
1347
- static bool areValuesTriviallySame (Value *V0, BasicBlock::const_iterator At0,
1348
- Value *V1, BasicBlock::const_iterator At1) {
1349
- if (V0 == V1)
1350
- return true ;
1351
-
1352
- // Also check for instructions that are identical but not pointer-identical.
1353
- // This can include load instructions that haven't been CSE'd.
1354
- if (!isa<Instruction>(V0) || !isa<Instruction>(V1))
1355
- return false ;
1356
- const auto *I0 = cast<Instruction>(V0);
1357
- const auto *I1 = cast<Instruction>(V1);
1358
- if (!I0->isIdenticalToWhenDefined (I1))
1359
- return false ;
1360
-
1361
- if (!I0->mayReadOrWriteMemory ())
1362
- return true ;
1363
-
1364
- // Instructions that may read or write memory have extra restrictions. We
1365
- // must ensure we don't treat %a and %b as equivalent in code such as:
1366
- //
1367
- // %a = load %x
1368
- // store %x, 1
1369
- // if (%c) {
1370
- // %b = load %x
1371
- // %d = add %b, 1
1372
- // } else {
1373
- // %d = add %a, 1
1374
- // }
1375
-
1376
- // Be conservative. We don't want to search the entire CFG between def
1377
- // and use; if the def isn't in the same block as the use just bail.
1378
- if (I0->getParent () != At0->getParent () ||
1379
- I1->getParent () != At1->getParent ())
1380
- return false ;
1381
-
1382
- // Again, be super conservative. Ideally we'd be able to query AliasAnalysis
1383
- // but we currently don't have that available.
1384
- auto WritesMemory = [](const Instruction &I) {
1385
- return I.mayReadOrWriteMemory ();
1386
- };
1387
- if (std::any_of (std::next (I0->getIterator ()), At0, WritesMemory))
1388
- return false ;
1389
- if (std::any_of (std::next (I1->getIterator ()), At1, WritesMemory))
1390
- return false ;
1391
- return true ;
1392
- }
1393
-
1394
1337
// Is it legal to place a variable in operand \c OpIdx of \c I?
1395
1338
// FIXME: This should be promoted to Instruction.
1396
1339
static bool canReplaceOperandWithVariable (const Instruction *I,
@@ -1425,21 +1368,14 @@ static bool canReplaceOperandWithVariable(const Instruction *I,
1425
1368
}
1426
1369
}
1427
1370
1428
- // All blocks in Blocks unconditionally jump to a common successor. Analyze
1429
- // the last non-terminator instruction in each block and return true if it would
1430
- // be possible to sink them into their successor, creating one common
1431
- // instruction instead. Set NumPHIsRequired to the number of PHI nodes that
1432
- // would need to be created during sinking.
1433
- static bool canSinkLastInstruction (ArrayRef<BasicBlock*> Blocks,
1434
- unsigned &NumPHIsRequired) {
1435
- SmallVector<Instruction*,4 > Insts;
1436
- for (auto *BB : Blocks) {
1437
- if (BB->getTerminator () == &BB->front ())
1438
- // Block was empty.
1439
- return false ;
1440
- Insts.push_back (BB->getTerminator ()->getPrevNode ());
1441
- }
1442
-
1371
+ // All instructions in Insts belong to different blocks that all unconditionally
1372
+ // branch to a common successor. Analyze each instruction and return true if it
1373
+ // would be possible to sink them into their successor, creating one common
1374
+ // instruction instead. For every value that would be required to be provided by
1375
+ // PHI node (because an operand varies in each input block), add to PHIOperands.
1376
+ static bool canSinkInstructions (
1377
+ ArrayRef<Instruction *> Insts,
1378
+ DenseMap<Instruction *, SmallVector<Value *, 4 >> &PHIOperands) {
1443
1379
// Prune out obviously bad instructions to move. Any non-store instruction
1444
1380
// must have exactly one use, and we check later that use is by a single,
1445
1381
// common PHI instruction in the successor.
@@ -1459,24 +1395,26 @@ static bool canSinkLastInstruction(ArrayRef<BasicBlock*> Blocks,
1459
1395
if (!I->isSameOperationAs (I0))
1460
1396
return false ;
1461
1397
1462
- // If this isn't a store, check the only user is a single PHI.
1398
+ // All instructions in Insts are known to be the same opcode. If they aren't
1399
+ // stores, check the only user of each is a PHI or in the same block as the
1400
+ // instruction, because if a user is in the same block as an instruction
1401
+ // we're contemplating sinking, it must already be determined to be sinkable.
1463
1402
if (!isa<StoreInst>(I0)) {
1464
1403
auto *PNUse = dyn_cast<PHINode>(*I0->user_begin ());
1465
- if (!PNUse ||
1466
- ! all_of (Insts, [&PNUse]( const Instruction *I) {
1467
- return *I-> user_begin () == PNUse ;
1404
+ if (!all_of (Insts, [& PNUse]( const Instruction *I) -> bool {
1405
+ auto *U = cast<Instruction>(*I-> user_begin ());
1406
+ return U == PNUse || U-> getParent () == I-> getParent () ;
1468
1407
}))
1469
1408
return false ;
1470
1409
}
1471
1410
1472
- NumPHIsRequired = 0 ;
1473
1411
for (unsigned OI = 0 , OE = I0->getNumOperands (); OI != OE; ++OI) {
1474
1412
if (I0->getOperand (OI)->getType ()->isTokenTy ())
1475
1413
// Don't touch any operand of token type.
1476
1414
return false ;
1477
1415
auto SameAsI0 = [&I0, OI](const Instruction *I) {
1478
- return areValuesTriviallySame (I->getOperand (OI), I-> getIterator (),
1479
- I0 ->getOperand (OI), I0->getIterator () );
1416
+ assert (I->getNumOperands () == I0-> getNumOperands ());
1417
+ return I ->getOperand (OI) == I0->getOperand (OI );
1480
1418
};
1481
1419
if (!all_of (Insts, SameAsI0)) {
1482
1420
if (!canReplaceOperandWithVariable (I0, OI))
@@ -1487,7 +1425,8 @@ static bool canSinkLastInstruction(ArrayRef<BasicBlock*> Blocks,
1487
1425
// FIXME: if the call was *already* indirect, we should do this.
1488
1426
return false ;
1489
1427
}
1490
- ++NumPHIsRequired;
1428
+ for (auto *I : Insts)
1429
+ PHIOperands[I].push_back (I->getOperand (OI));
1491
1430
}
1492
1431
}
1493
1432
return true ;
@@ -1496,11 +1435,7 @@ static bool canSinkLastInstruction(ArrayRef<BasicBlock*> Blocks,
1496
1435
// Assuming canSinkLastInstruction(Blocks) has returned true, sink the last
1497
1436
// instruction of every block in Blocks to their common successor, commoning
1498
1437
// into one instruction.
1499
- static void sinkLastInstruction (ArrayRef<BasicBlock*> Blocks) {
1500
- unsigned Dummy;
1501
- (void )Dummy;
1502
- assert (canSinkLastInstruction (Blocks, Dummy) &&
1503
- " Must analyze before transforming!" );
1438
+ static bool sinkLastInstruction (ArrayRef<BasicBlock*> Blocks) {
1504
1439
auto *BBEnd = Blocks[0 ]->getTerminator ()->getSuccessor (0 );
1505
1440
1506
1441
// canSinkLastInstruction returning true guarantees that every block has at
@@ -1509,9 +1444,22 @@ static void sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
1509
1444
for (auto *BB : Blocks)
1510
1445
Insts.push_back (BB->getTerminator ()->getPrevNode ());
1511
1446
1512
- // We don't need to do any checking here; canSinkLastInstruction should have
1513
- // done it all for us.
1447
+ // The only checking we need to do now is that all users of all instructions
1448
+ // are the same PHI node. canSinkLastInstruction should have checked this but
1449
+ // it is slightly over-aggressive - it gets confused by commutative instructions
1450
+ // so double-check it here.
1514
1451
Instruction *I0 = Insts.front ();
1452
+ if (!isa<StoreInst>(I0)) {
1453
+ auto *PNUse = dyn_cast<PHINode>(*I0->user_begin ());
1454
+ if (!all_of (Insts, [&PNUse](const Instruction *I) -> bool {
1455
+ auto *U = cast<Instruction>(*I->user_begin ());
1456
+ return U == PNUse;
1457
+ }))
1458
+ return false ;
1459
+ }
1460
+
1461
+ // We don't need to do any more checking here; canSinkLastInstruction should
1462
+ // have done it all for us.
1515
1463
SmallVector<Value*, 4 > NewOperands;
1516
1464
for (unsigned O = 0 , E = I0->getNumOperands (); O != E; ++O) {
1517
1465
// This check is different to that in canSinkLastInstruction. There, we
@@ -1563,6 +1511,62 @@ static void sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
1563
1511
for (auto *I : Insts)
1564
1512
if (I != I0)
1565
1513
I->eraseFromParent ();
1514
+
1515
+ return true ;
1516
+ }
1517
+
1518
+ namespace {
1519
+ // LockstepReverseIterator - Iterates through instructions
1520
+ // in a set of blocks in reverse order from the first non-terminator.
1521
+ // For example (assume all blocks have size n):
1522
+ // LockstepReverseIterator I([B1, B2, B3]);
1523
+ // *I-- = [B1[n], B2[n], B3[n]];
1524
+ // *I-- = [B1[n-1], B2[n-1], B3[n-1]];
1525
+ // *I-- = [B1[n-2], B2[n-2], B3[n-2]];
1526
+ // ...
1527
+ class LockstepReverseIterator {
1528
+ ArrayRef<BasicBlock*> Blocks;
1529
+ SmallVector<Instruction*,4 > Insts;
1530
+ bool Fail;
1531
+ public:
1532
+ LockstepReverseIterator (ArrayRef<BasicBlock*> Blocks) :
1533
+ Blocks (Blocks) {
1534
+ reset ();
1535
+ }
1536
+
1537
+ void reset () {
1538
+ Fail = false ;
1539
+ Insts.clear ();
1540
+ for (auto *BB : Blocks) {
1541
+ if (BB->size () <= 1 ) {
1542
+ // Block wasn't big enough
1543
+ Fail = true ;
1544
+ return ;
1545
+ }
1546
+ Insts.push_back (BB->getTerminator ()->getPrevNode ());
1547
+ }
1548
+ }
1549
+
1550
+ bool isValid () const {
1551
+ return !Fail;
1552
+ }
1553
+
1554
+ void operator -- () {
1555
+ if (Fail)
1556
+ return ;
1557
+ for (auto *&Inst : Insts) {
1558
+ if (Inst == &Inst->getParent ()->front ()) {
1559
+ Fail = true ;
1560
+ return ;
1561
+ }
1562
+ Inst = Inst->getPrevNode ();
1563
+ }
1564
+ }
1565
+
1566
+ ArrayRef<Instruction*> operator * () const {
1567
+ return Insts;
1568
+ }
1569
+ };
1566
1570
}
1567
1571
1568
1572
// / Given an unconditional branch that goes to BBEnd,
@@ -1573,6 +1577,8 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
1573
1577
assert (BI1->isUnconditional ());
1574
1578
BasicBlock *BBEnd = BI1->getSuccessor (0 );
1575
1579
1580
+ // We currently only support branch targets with two predecessors.
1581
+ // FIXME: this is an arbitrary restriction and should be lifted.
1576
1582
SmallVector<BasicBlock*,4 > Blocks;
1577
1583
for (auto *BB : predecessors (BBEnd))
1578
1584
Blocks.push_back (BB);
@@ -1584,12 +1590,65 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
1584
1590
return false ;
1585
1591
1586
1592
bool Changed = false ;
1587
- unsigned NumPHIsToInsert;
1588
- while (canSinkLastInstruction (Blocks, NumPHIsToInsert) && NumPHIsToInsert <= 1 ) {
1589
- sinkLastInstruction (Blocks);
1593
+
1594
+ // We take a two-step approach to tail sinking. First we scan from the end of
1595
+ // each block upwards in lockstep. If the n'th instruction from the end of each
1596
+ // block can be sunk, those instructions are added to ValuesToSink and we
1597
+ // carry on. If we can sink an instruction but need to PHI-merge some operands
1598
+ // (because they're not identical in each instruction) we add these to
1599
+ // PHIOperands.
1600
+ unsigned ScanIdx = 0 ;
1601
+ SmallPtrSet<Value*,4 > InstructionsToSink;
1602
+ DenseMap<Instruction*, SmallVector<Value*,4 >> PHIOperands;
1603
+ LockstepReverseIterator LRI (Blocks);
1604
+ while (LRI.isValid () &&
1605
+ canSinkInstructions (*LRI, PHIOperands)) {
1606
+ DEBUG (dbgs () << " SINK: instruction can be sunk: " << *(*LRI)[0 ] << " \n " );
1607
+ InstructionsToSink.insert ((*LRI).begin (), (*LRI).end ());
1608
+ ++ScanIdx;
1609
+ --LRI;
1610
+ }
1611
+
1612
+ // Now that we've analyzed all potential sinking candidates, perform the
1613
+ // actual sink. We iteratively sink the last non-terminator of the source
1614
+ // blocks into their common successor unless doing so would require too
1615
+ // many PHI instructions to be generated (currently only one PHI is allowed
1616
+ // per sunk instruction).
1617
+ //
1618
+ // We can use InstructionsToSink to discount values needing PHI-merging that will
1619
+ // actually be sunk in a later iteration. This allows us to be more
1620
+ // aggressive in what we sink. This does allow a false positive where we
1621
+ // sink presuming a later value will also be sunk, but stop half way through
1622
+ // and never actually sink it which means we produce more PHIs than intended.
1623
+ // This is unlikely in practice though.
1624
+ for (unsigned SinkIdx = 0 ; SinkIdx != ScanIdx; ++SinkIdx) {
1625
+ DEBUG (dbgs () << " SINK: Sink: "
1626
+ << *Blocks[0 ]->getTerminator ()->getPrevNode ()
1627
+ << " \n " );
1628
+
1629
+ // Because we've sunk every instruction in turn, the current instruction to
1630
+ // sink is always at index 0.
1631
+ LRI.reset ();
1632
+ unsigned NumPHIdValues = 0 ;
1633
+ for (auto *I : *LRI)
1634
+ for (auto *V : PHIOperands[I])
1635
+ if (InstructionsToSink.count (V) == 0 )
1636
+ ++NumPHIdValues;
1637
+ DEBUG (dbgs () << " SINK: #phid values: " << NumPHIdValues << " \n " );
1638
+ unsigned NumPHIInsts = NumPHIdValues / Blocks.size ();
1639
+ if (NumPHIdValues % Blocks.size () != 0 )
1640
+ NumPHIInsts++;
1641
+
1642
+ if (NumPHIInsts > 1 )
1643
+ // Too many PHIs would be created.
1644
+ break ;
1645
+
1646
+ if (!sinkLastInstruction (Blocks))
1647
+ return Changed;
1590
1648
NumSinkCommons++;
1591
1649
Changed = true ;
1592
1650
}
1651
+
1593
1652
return Changed;
1594
1653
}
1595
1654
0 commit comments