Skip to content

Commit 27fd0a7

Browse files
authored
Merge pull request #22032 from gottesmm/pr-fe2b0c7d51041aa5df7013240ef4456313b63f96
2 parents a12e981 + 25ed77e commit 27fd0a7

File tree

2 files changed

+590
-48
lines changed

2 files changed

+590
-48
lines changed

lib/SILOptimizer/Mandatory/PredictableMemOpt.cpp

Lines changed: 127 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,18 @@ struct AvailableValue {
244244
return {NewValue, SubElementNumber, InsertionPoints};
245245
}
246246

247+
AvailableValue emitBeginBorrow(SILBuilder &b, SILLocation loc) const {
248+
// If we do not have ownership or already are guaranteed, just return a copy
249+
// of our state.
250+
if (!b.hasOwnership() || Value.getOwnershipKind().isCompatibleWith(
251+
ValueOwnershipKind::Guaranteed)) {
252+
return {Value, SubElementNumber, InsertionPoints};
253+
}
254+
255+
// Otherwise, return newValue.
256+
return {b.createBeginBorrow(loc, Value), SubElementNumber, InsertionPoints};
257+
}
258+
247259
void dump() const LLVM_ATTRIBUTE_USED;
248260
void print(llvm::raw_ostream &os) const;
249261

@@ -301,10 +313,17 @@ static SILValue nonDestructivelyExtractSubElement(const AvailableValue &Val,
301313
SILType EltTy = ValTy.getTupleElementType(EltNo);
302314
unsigned NumSubElt = getNumSubElements(EltTy, B.getModule());
303315
if (SubElementNumber < NumSubElt) {
304-
auto NewVal = Val.emitTupleExtract(B, Loc, EltNo, SubElementNumber);
305-
return nonDestructivelyExtractSubElement(NewVal, B, Loc);
316+
auto BorrowedVal = Val.emitBeginBorrow(B, Loc);
317+
auto NewVal =
318+
BorrowedVal.emitTupleExtract(B, Loc, EltNo, SubElementNumber);
319+
SILValue result = nonDestructivelyExtractSubElement(NewVal, B, Loc);
320+
// If our original value wasn't guaranteed and we did actually perform a
321+
// borrow as a result, insert the end_borrow.
322+
if (BorrowedVal.getValue() != Val.getValue())
323+
B.createEndBorrow(Loc, BorrowedVal.getValue());
324+
return result;
306325
}
307-
326+
308327
SubElementNumber -= NumSubElt;
309328
}
310329

@@ -318,19 +337,32 @@ static SILValue nonDestructivelyExtractSubElement(const AvailableValue &Val,
318337
unsigned NumSubElt = getNumSubElements(fieldType, B.getModule());
319338

320339
if (SubElementNumber < NumSubElt) {
321-
auto NewVal = Val.emitStructExtract(B, Loc, D, SubElementNumber);
322-
return nonDestructivelyExtractSubElement(NewVal, B, Loc);
340+
auto BorrowedVal = Val.emitBeginBorrow(B, Loc);
341+
auto NewVal =
342+
BorrowedVal.emitStructExtract(B, Loc, D, SubElementNumber);
343+
SILValue result = nonDestructivelyExtractSubElement(NewVal, B, Loc);
344+
// If our original value wasn't guaranteed and we did actually perform a
345+
// borrow as a result, insert the end_borrow.
346+
if (BorrowedVal.getValue() != Val.getValue())
347+
B.createEndBorrow(Loc, BorrowedVal.getValue());
348+
return result;
323349
}
324350

325351
SubElementNumber -= NumSubElt;
326352

327353
}
328354
llvm_unreachable("Didn't find field");
329355
}
330-
331-
// Otherwise, we're down to a scalar.
356+
357+
// Otherwise, we're down to a scalar. If we have ownership enabled,
358+
// we return a copy. Otherwise, there we can ignore ownership
359+
// issues. This is ok since in [ossa] we are going to eliminate a
360+
// load [copy] or a load [trivial], while in non-[ossa] SIL we will
361+
// be replacing unqualified loads.
332362
assert(SubElementNumber == 0 && "Miscalculation indexing subelements");
333-
return Val.getValue();
363+
if (!B.hasOwnership())
364+
return Val.getValue();
365+
return B.emitCopyValueOperation(Loc, Val.getValue());
334366
}
335367

336368
//===----------------------------------------------------------------------===//
@@ -429,29 +461,61 @@ SILValue AvailableValueAggregator::aggregateValues(SILType LoadTy,
429461
// aggregate. This is a super-common case for single-element structs, but is
430462
// also a general answer for arbitrary structs and tuples as well.
431463
SILValue
432-
AvailableValueAggregator::aggregateFullyAvailableValue(SILType LoadTy,
433-
unsigned FirstElt) {
434-
if (FirstElt >= AvailableValueList.size()) { // #Elements may be zero.
464+
AvailableValueAggregator::aggregateFullyAvailableValue(SILType loadTy,
465+
unsigned firstElt) {
466+
if (firstElt >= AvailableValueList.size()) { // #Elements may be zero.
435467
return SILValue();
436468
}
437469

438-
auto &FirstVal = AvailableValueList[FirstElt];
470+
auto &firstVal = AvailableValueList[firstElt];
439471

440472
// Make sure that the first element is available and is the correct type.
441-
if (!FirstVal || FirstVal.getType() != LoadTy)
473+
if (!firstVal || firstVal.getType() != loadTy)
442474
return SILValue();
443475

444476
// If the first element of this value is available, check that any extra
445477
// available values are from the same place as our first value.
446-
if (llvm::any_of(range(getNumSubElements(LoadTy, M)),
447-
[&](unsigned Index) -> bool {
448-
auto &Val = AvailableValueList[FirstElt + Index];
449-
return Val.getValue() != FirstVal.getValue() ||
450-
Val.getSubElementNumber() != Index;
478+
if (llvm::any_of(range(getNumSubElements(loadTy, M)),
479+
[&](unsigned index) -> bool {
480+
auto &val = AvailableValueList[firstElt + index];
481+
return val.getValue() != firstVal.getValue() ||
482+
val.getSubElementNumber() != index;
451483
}))
452484
return SILValue();
453485

454-
return FirstVal.getValue();
486+
// Ok, we know that all of our available values are all parts of the same
487+
// value. Without ownership, we can just return the underlying first value.
488+
if (!B.hasOwnership())
489+
return firstVal.getValue();
490+
491+
// Otherwise, we need to put in a copy. This is b/c we only propagate along +1
492+
// values and we are eliminating a load [copy].
493+
ArrayRef<StoreInst *> insertPts = firstVal.getInsertionPoints();
494+
if (insertPts.size() == 1) {
495+
// Use the scope and location of the store at the insertion point.
496+
SILBuilderWithScope builder(insertPts[0]);
497+
SILLocation loc = insertPts[0]->getLoc();
498+
return builder.emitCopyValueOperation(loc, firstVal.getValue());
499+
}
500+
501+
// If we have multiple insertion points, put copies at each point and use the
502+
// SSA updater to get a value. The reason why this is safe is that we can only
503+
// have multiple insertion points if we are storing exactly the same value
504+
// implying that we can just copy firstVal at each insertion point.
505+
SILSSAUpdater updater(B.getModule());
506+
updater.Initialize(loadTy);
507+
for (auto *insertPt : firstVal.getInsertionPoints()) {
508+
// Use the scope and location of the store at the insertion point.
509+
SILBuilderWithScope builder(insertPt);
510+
SILLocation loc = insertPt->getLoc();
511+
SILValue eltVal = builder.emitCopyValueOperation(loc, firstVal.getValue());
512+
updater.AddAvailableValue(insertPt->getParent(), eltVal);
513+
}
514+
515+
// Finally, grab the value from the SSA updater.
516+
SILValue result = updater.GetValueInMiddleOfBlock(B.getInsertionBB());
517+
assert(result.getOwnershipKind().isCompatibleWith(ValueOwnershipKind::Owned));
518+
return result;
455519
}
456520

457521
SILValue AvailableValueAggregator::aggregateTupleSubElts(TupleType *TT,
@@ -511,8 +575,13 @@ SILValue AvailableValueAggregator::handlePrimitiveValue(SILType LoadTy,
511575

512576
// If the value is not available, load the value and update our use list.
513577
if (!Val) {
514-
auto *Load =
515-
B.createLoad(Loc, Address, LoadOwnershipQualifier::Unqualified);
578+
LoadInst *Load = ([&]() {
579+
if (B.hasOwnership()) {
580+
return B.createTrivialLoadOr(Loc, Address,
581+
LoadOwnershipQualifier::Copy);
582+
}
583+
return B.createLoad(Loc, Address, LoadOwnershipQualifier::Unqualified);
584+
}());
516585
Uses.emplace_back(Load, PMOUseKind::Load);
517586
return Load;
518587
}
@@ -527,6 +596,9 @@ SILValue AvailableValueAggregator::handlePrimitiveValue(SILType LoadTy,
527596
SILBuilderWithScope Builder(InsertPts[0]);
528597
SILLocation Loc = InsertPts[0]->getLoc();
529598
SILValue EltVal = nonDestructivelyExtractSubElement(Val, Builder, Loc);
599+
assert(
600+
!Builder.hasOwnership() ||
601+
EltVal.getOwnershipKind().isCompatibleWith(ValueOwnershipKind::Owned));
530602
assert(EltVal->getType() == LoadTy && "Subelement types mismatch");
531603
return EltVal;
532604
}
@@ -540,11 +612,16 @@ SILValue AvailableValueAggregator::handlePrimitiveValue(SILType LoadTy,
540612
SILBuilderWithScope Builder(I);
541613
SILLocation Loc = I->getLoc();
542614
SILValue EltVal = nonDestructivelyExtractSubElement(Val, Builder, Loc);
615+
assert(
616+
!Builder.hasOwnership() ||
617+
EltVal.getOwnershipKind().isCompatibleWith(ValueOwnershipKind::Owned));
543618
Updater.AddAvailableValue(I->getParent(), EltVal);
544619
}
545620

546621
// Finally, grab the value from the SSA updater.
547622
SILValue EltVal = Updater.GetValueInMiddleOfBlock(B.getInsertionBB());
623+
assert(!B.hasOwnership() ||
624+
EltVal.getOwnershipKind().isCompatibleWith(ValueOwnershipKind::Owned));
548625
assert(EltVal->getType() == LoadTy && "Subelement types mismatch");
549626
return EltVal;
550627
}
@@ -568,7 +645,7 @@ class AvailableValueDataflowContext {
568645
/// The set of uses that we are tracking. This is only here so we can update
569646
/// when exploding copy_addr. It would be great if we did not have to store
570647
/// this.
571-
llvm::SmallVectorImpl<PMOMemoryUse> &Uses;
648+
SmallVectorImpl<PMOMemoryUse> &Uses;
572649

573650
/// The set of blocks with local definitions.
574651
///
@@ -587,7 +664,7 @@ class AvailableValueDataflowContext {
587664
public:
588665
AvailableValueDataflowContext(AllocationInst *TheMemory,
589666
unsigned NumMemorySubElements,
590-
llvm::SmallVectorImpl<PMOMemoryUse> &Uses);
667+
SmallVectorImpl<PMOMemoryUse> &Uses);
591668

592669
/// Try to compute available values for "TheMemory" at the instruction \p
593670
/// StartingFrom. We only compute the values for set bits in \p
@@ -666,32 +743,33 @@ void AvailableValueDataflowContext::updateAvailableValues(
666743
assert(StartSubElt != ~0U && "Store within enum projection not handled");
667744
SILType ValTy = SI->getSrc()->getType();
668745

669-
for (unsigned i = 0, e = getNumSubElements(ValTy, getModule()); i != e;
670-
++i) {
746+
for (unsigned i : range(getNumSubElements(ValTy, getModule()))) {
671747
// If this element is not required, don't fill it in.
672748
if (!RequiredElts[StartSubElt+i]) continue;
673-
749+
750+
// This element is now provided.
751+
RequiredElts[StartSubElt + i] = false;
752+
674753
// If there is no result computed for this subelement, record it. If
675754
// there already is a result, check it for conflict. If there is no
676755
// conflict, then we're ok.
677756
auto &Entry = Result[StartSubElt+i];
678757
if (!Entry) {
679758
Entry = {SI->getSrc(), i, SI};
680-
} else {
681-
// TODO: This is /really/, /really/, conservative. This basically means
682-
// that if we do not have an identical store, we will not promote.
683-
if (Entry.getValue() != SI->getSrc() ||
684-
Entry.getSubElementNumber() != i) {
685-
ConflictingValues[StartSubElt + i] = true;
686-
} else {
687-
Entry.addInsertionPoint(SI);
688-
}
759+
continue;
689760
}
690761

691-
// This element is now provided.
692-
RequiredElts[StartSubElt+i] = false;
762+
// TODO: This is /really/, /really/, conservative. This basically means
763+
// that if we do not have an identical store, we will not promote.
764+
if (Entry.getValue() != SI->getSrc() ||
765+
Entry.getSubElementNumber() != i) {
766+
ConflictingValues[StartSubElt + i] = true;
767+
continue;
768+
}
769+
770+
Entry.addInsertionPoint(SI);
693771
}
694-
772+
695773
return;
696774
}
697775

@@ -704,8 +782,7 @@ void AvailableValueDataflowContext::updateAvailableValues(
704782
SILType ValTy = CAI->getDest()->getType();
705783

706784
bool AnyRequired = false;
707-
for (unsigned i = 0, e = getNumSubElements(ValTy, getModule()); i != e;
708-
++i) {
785+
for (unsigned i : range(getNumSubElements(ValTy, getModule()))) {
709786
// If this element is not required, don't fill it in.
710787
AnyRequired = RequiredElts[StartSubElt+i];
711788
if (AnyRequired) break;
@@ -1044,9 +1121,11 @@ class AllocOptimize {
10441121
/// instruction is loading from. If we can not optimize \p Inst, then just
10451122
/// return an empty SILValue.
10461123
static SILValue tryFindSrcAddrForLoad(SILInstruction *Inst) {
1047-
// We only handle load [copy], load [trivial] and copy_addr right now.
1124+
// We only handle load [copy], load [trivial], load and copy_addr right
1125+
// now. Notably we do not support load [take] when promoting loads.
10481126
if (auto *LI = dyn_cast<LoadInst>(Inst))
1049-
return LI->getOperand();
1127+
if (LI->getOwnershipQualifier() != LoadOwnershipQualifier::Take)
1128+
return LI->getOperand();
10501129

10511130
// If this is a CopyAddr, verify that the element type is loadable. If not,
10521131
// we can't explode to a load.
@@ -1161,19 +1240,19 @@ bool AllocOptimize::canPromoteDestroyAddr(
11611240
unsigned FirstElt = computeSubelement(Address, TheMemory);
11621241
assert(FirstElt != ~0U && "destroy within enum projection is not valid");
11631242
unsigned NumLoadSubElements = getNumSubElements(LoadTy, Module);
1164-
1165-
// Set up the bitvector of elements being demanded by the load.
1166-
SmallBitVector RequiredElts(NumMemorySubElements);
1167-
RequiredElts.set(FirstElt, FirstElt+NumLoadSubElements);
11681243

11691244
// Find out if we have any available values. If no bits are demanded, we
11701245
// trivially succeed. This can happen when there is a load of an empty struct.
11711246
if (NumLoadSubElements == 0)
11721247
return true;
11731248

1249+
// Set up the bitvector of elements being demanded by the load.
1250+
SmallBitVector RequiredElts(NumMemorySubElements);
1251+
RequiredElts.set(FirstElt, FirstElt + NumLoadSubElements);
1252+
11741253
// Compute our available values. If we do not have any available values,
11751254
// return false. We have nothing further to do.
1176-
llvm::SmallVector<AvailableValue, 8> TmpList;
1255+
SmallVector<AvailableValue, 8> TmpList;
11771256
TmpList.resize(NumMemorySubElements);
11781257
if (!DataflowContext.computeAvailableValues(DAI, FirstElt, NumLoadSubElements,
11791258
RequiredElts, TmpList))
@@ -1208,7 +1287,7 @@ void AllocOptimize::promoteDestroyAddr(
12081287
SILValue NewVal = Agg.aggregateValues(LoadTy, Address, FirstElt);
12091288

12101289
++NumDestroyAddrPromoted;
1211-
1290+
12121291
LLVM_DEBUG(llvm::dbgs() << " *** Promoting destroy_addr: " << *DAI << "\n");
12131292
LLVM_DEBUG(llvm::dbgs() << " To value: " << *NewVal << "\n");
12141293

0 commit comments

Comments
 (0)