Skip to content

Commit 3b4d131

Browse files
authored
Merge pull request #40291 from eeckstein/cow-opt-improvements
Array related optimization improvements
2 parents 9fc633e + 8e83d5f commit 3b4d131

33 files changed

+605
-114
lines changed

include/swift/SIL/InstructionUtils.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,6 @@ namespace swift {
2626
/// nothing left to strip.
2727
SILValue getUnderlyingObject(SILValue V);
2828

29-
SILValue getUnderlyingObjectStopAtMarkDependence(SILValue V);
30-
3129
SILValue stripSinglePredecessorArgs(SILValue V);
3230

3331
/// Return the underlying SILValue after stripping off all casts from the

include/swift/SIL/SILBridging.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,8 @@ BridgedInstruction SILBuilder_createBuiltinBinaryFunction(
237237
BridgedType operandType, BridgedType resultType, BridgedValueArray arguments);
238238
BridgedInstruction SILBuilder_createCondFail(BridgedInstruction insertionPoint,
239239
BridgedLocation loc, BridgedValue condition, BridgedStringRef messge);
240+
BridgedInstruction SILBuilder_createIntegerLiteral(BridgedInstruction insertionPoint,
241+
BridgedLocation loc, BridgedType type, SwiftInt value);
240242

241243
SWIFT_END_NULLABILITY_ANNOTATIONS
242244

include/swift/SIL/SILNodes.def

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -469,7 +469,7 @@ ABSTRACT_VALUE_AND_INST(SingleValueInstruction, ValueBase, SILInstruction)
469469
LiteralInst, None, DoesNotRelease)
470470
BRIDGED_SINGLE_VALUE_INST(GlobalValueInst, global_value,
471471
LiteralInst, None, DoesNotRelease)
472-
SINGLE_VALUE_INST(IntegerLiteralInst, integer_literal,
472+
BRIDGED_SINGLE_VALUE_INST(IntegerLiteralInst, integer_literal,
473473
LiteralInst, None, DoesNotRelease)
474474
SINGLE_VALUE_INST(FloatLiteralInst, float_literal,
475475
LiteralInst, None, DoesNotRelease)

include/swift/SILOptimizer/PassManager/Passes.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -431,6 +431,7 @@ PASS(PruneVTables, "prune-vtables",
431431
"Mark class methods that do not require vtable dispatch")
432432
PASS_RANGE(AllPasses, AADumper, PruneVTables)
433433

434+
SWIFT_INSTRUCTION_PASS(BeginCOWMutationInst, "simplify-begin_cow_mutation")
434435
SWIFT_INSTRUCTION_PASS_WITH_LEGACY(GlobalValueInst, "simplify-global_value")
435436
SWIFT_INSTRUCTION_PASS_WITH_LEGACY(StrongRetainInst, "simplify-strong_retain")
436437
SWIFT_INSTRUCTION_PASS_WITH_LEGACY(StrongReleaseInst, "simplify-strong_release")

include/swift/SILOptimizer/Utils/LoadStoreOptUtils.h

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -379,21 +379,37 @@ class LSLocation : public LSBase {
379379
static void reduce(LSLocation Base, SILModule *Mod,
380380
TypeExpansionContext context, LSLocationList &Locs);
381381

382+
/// Gets the base address for `v`.
383+
/// If `stopAtImmutable` is true, the base address is only calculated up to
384+
/// a `ref_element_addr [immutable]` or a `ref_tail_addr [immutable]`.
385+
/// Return the base address and true if such an immutable class projection
386+
/// is found.
387+
static std::pair<SILValue, bool>
388+
getBaseAddressOrObject(SILValue v, bool stopAtImmutable);
389+
382390
/// Enumerate the given Mem LSLocation.
383-
static void enumerateLSLocation(TypeExpansionContext context, SILModule *M,
391+
/// If `stopAtImmutable` is true, the base address is only calculated up to
392+
/// a `ref_element_addr [immutable]` or a `ref_tail_addr [immutable]`.
393+
/// Returns true if it's an immutable location.
394+
static bool enumerateLSLocation(TypeExpansionContext context, SILModule *M,
384395
SILValue Mem,
385396
std::vector<LSLocation> &LSLocationVault,
386397
LSLocationIndexMap &LocToBit,
387398
LSLocationBaseMap &BaseToLoc,
388-
TypeExpansionAnalysis *TE);
399+
TypeExpansionAnalysis *TE,
400+
bool stopAtImmutable);
389401

390402
/// Enumerate all the locations in the function.
403+
/// If `stopAtImmutable` is true, the base addresses are only calculated up to
404+
/// a `ref_element_addr [immutable]` or a `ref_tail_addr [immutable]`.
391405
static void enumerateLSLocations(SILFunction &F,
392406
std::vector<LSLocation> &LSLocationVault,
393407
LSLocationIndexMap &LocToBit,
394408
LSLocationBaseMap &BaseToLoc,
395409
TypeExpansionAnalysis *TE,
396-
std::pair<int, int> &LSCount);
410+
bool stopAtImmutable,
411+
int &numLoads, int &numStores,
412+
bool &immutableLoadsFound);
397413
};
398414

399415
static inline llvm::hash_code hash_value(const LSLocation &L) {

lib/SIL/Utils/InstructionUtils.cpp

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -62,18 +62,6 @@ SILValue swift::getUnderlyingObject(SILValue v) {
6262
}
6363
}
6464

65-
SILValue swift::getUnderlyingObjectStopAtMarkDependence(SILValue v) {
66-
while (true) {
67-
SILValue v2 = stripCastsWithoutMarkDependence(v);
68-
v2 = stripAddressProjections(v2);
69-
v2 = stripIndexingInsts(v2);
70-
v2 = lookThroughOwnershipInsts(v2);
71-
if (v2 == v)
72-
return v2;
73-
v = v2;
74-
}
75-
}
76-
7765
/// Return the underlying SILValue after stripping off identity SILArguments if
7866
/// we belong to a BB with one predecessor.
7967
SILValue swift::stripSinglePredecessorArgs(SILValue V) {

lib/SIL/Utils/SILBridging.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -513,3 +513,11 @@ BridgedInstruction SILBuilder_createCondFail(BridgedInstruction insertionPoint,
513513
return {builder.createCondFail(getRegularLocation(loc),
514514
castToSILValue(condition), getStringRef(messge))};
515515
}
516+
517+
BridgedInstruction SILBuilder_createIntegerLiteral(BridgedInstruction insertionPoint,
518+
BridgedLocation loc, BridgedType type, SwiftInt value) {
519+
SILBuilder builder(castToInst(insertionPoint), getSILDebugScope(loc));
520+
return {builder.createIntegerLiteral(getRegularLocation(loc),
521+
getSILType(type), value)};
522+
}
523+

lib/SILOptimizer/Analysis/MemoryBehavior.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -566,6 +566,17 @@ static SILValue getBeginScopeInst(SILValue V) {
566566
if (BorrowedValue borrowedObj = getSingleBorrowIntroducingValue(object)) {
567567
return borrowedObj.value;
568568
}
569+
if (!object->getFunction()->hasOwnership()) {
570+
// In non-OSSA, do a quick check if the object is a guaranteed function
571+
// argument.
572+
// Note that in OSSA, getSingleBorrowIntroducingValue will detect a
573+
// guaranteed argument.
574+
SILValue root = findOwnershipReferenceAggregate(object);
575+
if (auto *funcArg = dyn_cast<SILFunctionArgument>(root)) {
576+
if (funcArg->getArgumentConvention().isGuaranteedConvention())
577+
return funcArg;
578+
}
579+
}
569580
return SILValue();
570581
}
571582

lib/SILOptimizer/SILCombiner/SILCombinerMiscVisitors.cpp

Lines changed: 35 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -804,6 +804,18 @@ SILInstruction *SILCombiner::optimizeLoadFromStringLiteral(LoadInst *LI) {
804804
return Builder.createIntegerLiteral(LI->getLoc(), LI->getType(), str[index]);
805805
}
806806

807+
static bool isShiftRightByAtLeastOne(SILInstruction *inst) {
808+
auto *bi = dyn_cast<BuiltinInst>(inst);
809+
if (!bi)
810+
return false;
811+
if (bi->getBuiltinInfo().ID != BuiltinValueKind::LShr)
812+
return false;
813+
auto *shiftVal = dyn_cast<IntegerLiteralInst>(bi->getArguments()[1]);
814+
if (!shiftVal)
815+
return false;
816+
return shiftVal->getValue().isStrictlyPositive();
817+
}
818+
807819
/// Returns true if \p LI loads a zero integer from the empty Array, Dictionary
808820
/// or Set singleton.
809821
static bool isZeroLoadFromEmptyCollection(SingleValueInstruction *LI) {
@@ -826,15 +838,23 @@ static bool isZeroLoadFromEmptyCollection(SingleValueInstruction *LI) {
826838
}
827839
case ValueKind::StructElementAddrInst: {
828840
auto *SEA = cast<StructElementAddrInst>(addr);
829-
// For Array, we only support "count". The value of "capacityAndFlags"
830-
// is not defined in the ABI and could change in another version of the
831-
// runtime (the capacity must be 0, but the flags may be not 0).
832-
if (SEA->getStructDecl()->getName().is("_SwiftArrayBodyStorage") &&
833-
!SEA->getField()->getName().is("count")) {
834-
return false;
835-
}
836841
addr = SEA->getOperand();
837-
break;
842+
if (!SEA->getStructDecl()->getName().is("_SwiftArrayBodyStorage"))
843+
break;
844+
if (SEA->getField()->getName().is("count"))
845+
break;
846+
// For Array, the value of `capacityAndFlags` has only a zero capacity
847+
// but not necessarily a zero flag (in fact, the flag is 1).
848+
// Therefore only replace `capacityAndFlags` with zero if the flag is
849+
// masked out by a right-shift of 1.
850+
if (SEA->getField()->getName().is("_capacityAndFlags")) {
851+
for (Operand *loadUse : LI->getUses()) {
852+
if (!isShiftRightByAtLeastOne(loadUse->getUser()))
853+
return false;
854+
}
855+
break;
856+
}
857+
return false;
838858
}
839859
case ValueKind::RefElementAddrInst: {
840860
auto *REA = cast<RefElementAddrInst>(addr);
@@ -858,6 +878,13 @@ static bool isZeroLoadFromEmptyCollection(SingleValueInstruction *LI) {
858878
case ValueKind::EndCOWMutationInst:
859879
addr = cast<SingleValueInstruction>(addr)->getOperand(0);
860880
break;
881+
case ValueKind::MultipleValueInstructionResult:
882+
if (auto *bci = dyn_cast<BeginCOWMutationInst>(
883+
addr->getDefiningInstruction())) {
884+
addr = bci->getOperand();
885+
break;
886+
}
887+
return false;
861888
default:
862889
return false;
863890
}

lib/SILOptimizer/Transforms/COWOpts.cpp

Lines changed: 7 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -58,10 +58,6 @@ namespace {
5858
/// The optimization can also handle def-use chains between end_cow_mutation and
5959
/// begin_cow_mutation which involve phi-arguments.
6060
///
61-
/// An additional peephole optimization is performed: if the begin_cow_mutation
62-
/// is the only use of the end_cow_mutation, the whole pair of instructions
63-
/// is eliminated.
64-
///
6561
class COWOptsPass : public SILFunctionTransform {
6662
public:
6763
COWOptsPass() {}
@@ -86,18 +82,19 @@ void COWOptsPass::run() {
8682
if (!F->shouldOptimize())
8783
return;
8884

89-
LLVM_DEBUG(llvm::dbgs() << "*** RedundantPhiElimination on function: "
85+
LLVM_DEBUG(llvm::dbgs() << "*** COW optimization on function: "
9086
<< F->getName() << " ***\n");
9187

9288
AA = PM->getAnalysis<AliasAnalysis>(F);
9389

9490
bool changed = false;
9591
for (SILBasicBlock &block : *F) {
96-
auto iter = block.begin();
97-
while (iter != block.end()) {
98-
SILInstruction *inst = &*iter++;
99-
if (auto *beginCOW = dyn_cast<BeginCOWMutationInst>(inst))
100-
changed |= optimizeBeginCOW(beginCOW);
92+
93+
for (SILInstruction &inst : block) {
94+
if (auto *beginCOW = dyn_cast<BeginCOWMutationInst>(&inst)) {
95+
if (optimizeBeginCOW(beginCOW))
96+
changed = true;
97+
}
10198
}
10299
}
103100

@@ -212,20 +209,6 @@ bool COWOptsPass::optimizeBeginCOW(BeginCOWMutationInst *BCM) {
212209
BCM->getUniquenessResult()->getType(), 1);
213210
BCM->getUniquenessResult()->replaceAllUsesWith(IL);
214211

215-
// Try the peephole optimization: remove an end_cow_mutation/begin_cow_mutation
216-
// pair completely if the begin_cow_mutation is the only use of
217-
// end_cow_mutation.
218-
if (auto *singleEndCOW = dyn_cast<EndCOWMutationInst>(BCM->getOperand())) {
219-
assert(endCOWMutationInsts.size() == 1 &&
220-
*endCOWMutationInsts.begin() == singleEndCOW);
221-
if (singleEndCOW->hasOneUse()) {
222-
BCM->getBufferResult()->replaceAllUsesWith(singleEndCOW->getOperand());
223-
BCM->eraseFromParent();
224-
singleEndCOW->eraseFromParent();
225-
return true;
226-
}
227-
}
228-
229212
for (EndCOWMutationInst *ECM : endCOWMutationInsts) {
230213
// This is important for other optimizations: The code is now relying on
231214
// the buffer to be unique.

lib/SILOptimizer/Transforms/DeadStoreElimination.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1182,14 +1182,17 @@ void DSEContext::runIterativeDSE() {
11821182
}
11831183

11841184
bool DSEContext::run() {
1185-
std::pair<int, int> LSCount = std::make_pair(0, 0);
1185+
int numLoads = 0, numStores = 0;
1186+
bool immutableLoadsFound = false;
11861187
// Walk over the function and find all the locations accessed by
11871188
// this function.
11881189
LSLocation::enumerateLSLocations(*F, LocationVault, LocToBitIndex,
1189-
BaseToLocIndex, TE, LSCount);
1190+
BaseToLocIndex, TE,
1191+
/*stopAtImmutable*/ false,
1192+
numLoads, numStores, immutableLoadsFound);
11901193

11911194
// Check how to optimize this function.
1192-
ProcessKind Kind = getProcessFunctionKind(LSCount.second);
1195+
ProcessKind Kind = getProcessFunctionKind(numStores);
11931196

11941197
// We do not optimize this function at all.
11951198
if (Kind == ProcessKind::ProcessNone)

lib/SILOptimizer/Transforms/RedundantLoadElimination.cpp

Lines changed: 42 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -486,14 +486,21 @@ class RLEContext {
486486
/// If set, RLE ignores loads from that array type.
487487
NominalTypeDecl *ArrayType;
488488

489+
/// Se to true if loads with a `ref_element_addr [immutable]` or
490+
/// `ref_tail_addr [immutable]` base address are found.
491+
bool immutableLoadsFound = false;
492+
493+
/// Only optimize loads with a base address of `ref_element_addr [immutable]`
494+
/// `ref_tail_addr [immutable]`.
495+
bool onlyImmutableLoads;
496+
489497
#ifndef NDEBUG
490498
SILPrintContext printCtx;
491499
#endif
492500

493501
public:
494-
RLEContext(SILFunction *F, SILPassManager *PM, AliasAnalysis *AA,
495-
TypeExpansionAnalysis *TE, PostOrderFunctionInfo *PO,
496-
EpilogueARCFunctionInfo *EAFI, bool disableArrayLoads);
502+
RLEContext(SILFunction *F, SILPassManager *PM,
503+
bool disableArrayLoads, bool onlyImmutableLoads);
497504

498505
RLEContext(const RLEContext &) = delete;
499506
RLEContext(RLEContext &&) = delete;
@@ -504,6 +511,8 @@ class RLEContext {
504511
/// Entry point to redundant load elimination.
505512
bool run();
506513

514+
bool shouldOptimizeImmutableLoads() const { return immutableLoadsFound; }
515+
507516
SILFunction *getFunction() const { return Fn; }
508517

509518
/// Use a set of ad hoc rules to tell whether we should run a pessimistic
@@ -570,6 +579,11 @@ class RLEContext {
570579
LI->getType().getNominalOrBoundGenericNominal() != ArrayType) {
571580
return LI;
572581
}
582+
if (onlyImmutableLoads &&
583+
!LSLocation::getBaseAddressOrObject(LI->getOperand(),
584+
/*stopAtImmutable*/ true).second) {
585+
return nullptr;
586+
}
573587
}
574588
return nullptr;
575589
}
@@ -1200,14 +1214,17 @@ void BlockState::dump(RLEContext &Ctx) {
12001214
// RLEContext Implementation
12011215
//===----------------------------------------------------------------------===//
12021216

1203-
RLEContext::RLEContext(SILFunction *F, SILPassManager *PM, AliasAnalysis *AA,
1204-
TypeExpansionAnalysis *TE, PostOrderFunctionInfo *PO,
1205-
EpilogueARCFunctionInfo *EAFI, bool disableArrayLoads)
1206-
: Fn(F), PM(PM), AA(AA), TE(TE), PO(PO), EAFI(EAFI), BBToLocState(F),
1207-
BBWithLoads(F),
1217+
RLEContext::RLEContext(SILFunction *F, SILPassManager *PM,
1218+
bool disableArrayLoads, bool onlyImmutableLoads)
1219+
: Fn(F), PM(PM), AA(PM->getAnalysis<AliasAnalysis>(F)),
1220+
TE(PM->getAnalysis<TypeExpansionAnalysis>()),
1221+
PO(PM->getAnalysis<PostOrderAnalysis>()->get(F)),
1222+
EAFI(PM->getAnalysis<EpilogueARCAnalysis>()->get(F)),
1223+
BBToLocState(F), BBWithLoads(F),
12081224
ArrayType(disableArrayLoads
12091225
? F->getModule().getASTContext().getArrayDecl()
1210-
: nullptr)
1226+
: nullptr),
1227+
onlyImmutableLoads(onlyImmutableLoads)
12111228
#ifndef NDEBUG
12121229
,
12131230
printCtx(llvm::dbgs(), /*Verbose=*/false, /*Sorted=*/true)
@@ -1567,14 +1584,15 @@ bool RLEContext::run() {
15671584
// Phase 4. we perform the redundant load elimination.
15681585
// Walk over the function and find all the locations accessed by
15691586
// this function.
1570-
std::pair<int, int> LSCount = std::make_pair(0, 0);
1587+
int numLoads = 0, numStores = 0;
15711588
LSLocation::enumerateLSLocations(*Fn, LocationVault,
15721589
LocToBitIndex,
15731590
BaseToLocIndex, TE,
1574-
LSCount);
1591+
/*stopAtImmutable*/ onlyImmutableLoads,
1592+
numLoads, numStores, immutableLoadsFound);
15751593

15761594
// Check how to optimize this function.
1577-
ProcessKind Kind = getProcessFunctionKind(LSCount.first, LSCount.second);
1595+
ProcessKind Kind = getProcessFunctionKind(numLoads, numStores);
15781596

15791597
// We do not optimize this function at all.
15801598
if (Kind == ProcessKind::ProcessNone)
@@ -1681,15 +1699,21 @@ class RedundantLoadElimination : public SILFunctionTransform {
16811699
LLVM_DEBUG(llvm::dbgs() << "*** RLE on function: " << F->getName()
16821700
<< " ***\n");
16831701

1684-
auto *AA = PM->getAnalysis<AliasAnalysis>(F);
1685-
auto *TE = PM->getAnalysis<TypeExpansionAnalysis>();
1686-
auto *PO = PM->getAnalysis<PostOrderAnalysis>()->get(F);
1687-
auto *EAFI = PM->getAnalysis<EpilogueARCAnalysis>()->get(F);
1688-
1689-
RLEContext RLE(F, PM, AA, TE, PO, EAFI, disableArrayLoads);
1702+
RLEContext RLE(F, PM, disableArrayLoads,
1703+
/*onlyImmutableLoads*/ false);
16901704
if (RLE.run()) {
16911705
invalidateAnalysis(SILAnalysis::InvalidationKind::Instructions);
16921706
}
1707+
if (RLE.shouldOptimizeImmutableLoads()) {
1708+
/// Re-running RLE with cutting base addresses off at
1709+
/// `ref_element_addr [immutable]` or `ref_tail_addr [immutable]` can
1710+
/// expose additional opportunities.
1711+
RLEContext RLE2(F, PM, disableArrayLoads,
1712+
/*onlyImmutableLoads*/ true);
1713+
if (RLE2.run()) {
1714+
invalidateAnalysis(SILAnalysis::InvalidationKind::Instructions);
1715+
}
1716+
}
16931717
}
16941718
};
16951719

0 commit comments

Comments
 (0)