Skip to content

Commit f97876c

Browse files
committed
RLE: better handling of ref_element/tail_addr [immutable]
Rerun RLE with cutting off the base address of loads at `ref_element/tail_addr [immutable]`. This increases the chance of catching loads of immutable COW class properties or elements.
1 parent b89f58d commit f97876c

File tree

7 files changed

+131
-49
lines changed

7 files changed

+131
-49
lines changed

include/swift/SIL/InstructionUtils.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,6 @@ namespace swift {
2626
/// nothing left to strip.
2727
SILValue getUnderlyingObject(SILValue V);
2828

29-
SILValue getUnderlyingObjectStopAtMarkDependence(SILValue V);
30-
3129
SILValue stripSinglePredecessorArgs(SILValue V);
3230

3331
/// Return the underlying SILValue after stripping off all casts from the

include/swift/SILOptimizer/Utils/LoadStoreOptUtils.h

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -379,21 +379,37 @@ class LSLocation : public LSBase {
379379
static void reduce(LSLocation Base, SILModule *Mod,
380380
TypeExpansionContext context, LSLocationList &Locs);
381381

382+
/// Gets the base address for `v`.
383+
/// If `stopAtImmutable` is true, the base address is only calculated up to
384+
/// a `ref_element_addr [immutable]` or a `ref_tail_addr [immutable]`.
385+
/// Return the base address and true if such an immutable class projection
386+
/// is found.
387+
static std::pair<SILValue, bool>
388+
getBaseAddressOrObject(SILValue v, bool stopAtImmutable);
389+
382390
/// Enumerate the given Mem LSLocation.
383-
static void enumerateLSLocation(TypeExpansionContext context, SILModule *M,
391+
/// If `stopAtImmutable` is true, the base address is only calculated up to
392+
/// a `ref_element_addr [immutable]` or a `ref_tail_addr [immutable]`.
393+
/// Returns true if it's an immutable location.
394+
static bool enumerateLSLocation(TypeExpansionContext context, SILModule *M,
384395
SILValue Mem,
385396
std::vector<LSLocation> &LSLocationVault,
386397
LSLocationIndexMap &LocToBit,
387398
LSLocationBaseMap &BaseToLoc,
388-
TypeExpansionAnalysis *TE);
399+
TypeExpansionAnalysis *TE,
400+
bool stopAtImmutable);
389401

390402
/// Enumerate all the locations in the function.
403+
/// If `stopAtImmutable` is true, the base addresses are only calculated up to
404+
/// a `ref_element_addr [immutable]` or a `ref_tail_addr [immutable]`.
391405
static void enumerateLSLocations(SILFunction &F,
392406
std::vector<LSLocation> &LSLocationVault,
393407
LSLocationIndexMap &LocToBit,
394408
LSLocationBaseMap &BaseToLoc,
395409
TypeExpansionAnalysis *TE,
396-
std::pair<int, int> &LSCount);
410+
bool stopAtImmutable,
411+
int &numLoads, int &numStores,
412+
bool &immutableLoadsFound);
397413
};
398414

399415
static inline llvm::hash_code hash_value(const LSLocation &L) {

lib/SIL/Utils/InstructionUtils.cpp

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -62,18 +62,6 @@ SILValue swift::getUnderlyingObject(SILValue v) {
6262
}
6363
}
6464

65-
SILValue swift::getUnderlyingObjectStopAtMarkDependence(SILValue v) {
66-
while (true) {
67-
SILValue v2 = stripCastsWithoutMarkDependence(v);
68-
v2 = stripAddressProjections(v2);
69-
v2 = stripIndexingInsts(v2);
70-
v2 = lookThroughOwnershipInsts(v2);
71-
if (v2 == v)
72-
return v2;
73-
v = v2;
74-
}
75-
}
76-
7765
/// Return the underlying SILValue after stripping off identity SILArguments if
7866
/// we belong to a BB with one predecessor.
7967
SILValue swift::stripSinglePredecessorArgs(SILValue V) {

lib/SILOptimizer/Transforms/DeadStoreElimination.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1182,14 +1182,17 @@ void DSEContext::runIterativeDSE() {
11821182
}
11831183

11841184
bool DSEContext::run() {
1185-
std::pair<int, int> LSCount = std::make_pair(0, 0);
1185+
int numLoads = 0, numStores = 0;
1186+
bool immutableLoadsFound = false;
11861187
// Walk over the function and find all the locations accessed by
11871188
// this function.
11881189
LSLocation::enumerateLSLocations(*F, LocationVault, LocToBitIndex,
1189-
BaseToLocIndex, TE, LSCount);
1190+
BaseToLocIndex, TE,
1191+
/*stopAtImmutable*/ false,
1192+
numLoads, numStores, immutableLoadsFound);
11901193

11911194
// Check how to optimize this function.
1192-
ProcessKind Kind = getProcessFunctionKind(LSCount.second);
1195+
ProcessKind Kind = getProcessFunctionKind(numStores);
11931196

11941197
// We do not optimize this function at all.
11951198
if (Kind == ProcessKind::ProcessNone)

lib/SILOptimizer/Transforms/RedundantLoadElimination.cpp

Lines changed: 42 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -486,14 +486,21 @@ class RLEContext {
486486
/// If set, RLE ignores loads from that array type.
487487
NominalTypeDecl *ArrayType;
488488

489+
/// Se to true if loads with a `ref_element_addr [immutable]` or
490+
/// `ref_tail_addr [immutable]` base address are found.
491+
bool immutableLoadsFound = false;
492+
493+
/// Only optimize loads with a base address of `ref_element_addr [immutable]`
494+
/// `ref_tail_addr [immutable]`.
495+
bool onlyImmutableLoads;
496+
489497
#ifndef NDEBUG
490498
SILPrintContext printCtx;
491499
#endif
492500

493501
public:
494-
RLEContext(SILFunction *F, SILPassManager *PM, AliasAnalysis *AA,
495-
TypeExpansionAnalysis *TE, PostOrderFunctionInfo *PO,
496-
EpilogueARCFunctionInfo *EAFI, bool disableArrayLoads);
502+
RLEContext(SILFunction *F, SILPassManager *PM,
503+
bool disableArrayLoads, bool onlyImmutableLoads);
497504

498505
RLEContext(const RLEContext &) = delete;
499506
RLEContext(RLEContext &&) = delete;
@@ -504,6 +511,8 @@ class RLEContext {
504511
/// Entry point to redundant load elimination.
505512
bool run();
506513

514+
bool shouldOptimizeImmutableLoads() const { return immutableLoadsFound; }
515+
507516
SILFunction *getFunction() const { return Fn; }
508517

509518
/// Use a set of ad hoc rules to tell whether we should run a pessimistic
@@ -570,6 +579,11 @@ class RLEContext {
570579
LI->getType().getNominalOrBoundGenericNominal() != ArrayType) {
571580
return LI;
572581
}
582+
if (onlyImmutableLoads &&
583+
!LSLocation::getBaseAddressOrObject(LI->getOperand(),
584+
/*stopAtImmutable*/ true).second) {
585+
return nullptr;
586+
}
573587
}
574588
return nullptr;
575589
}
@@ -1200,14 +1214,17 @@ void BlockState::dump(RLEContext &Ctx) {
12001214
// RLEContext Implementation
12011215
//===----------------------------------------------------------------------===//
12021216

1203-
RLEContext::RLEContext(SILFunction *F, SILPassManager *PM, AliasAnalysis *AA,
1204-
TypeExpansionAnalysis *TE, PostOrderFunctionInfo *PO,
1205-
EpilogueARCFunctionInfo *EAFI, bool disableArrayLoads)
1206-
: Fn(F), PM(PM), AA(AA), TE(TE), PO(PO), EAFI(EAFI), BBToLocState(F),
1207-
BBWithLoads(F),
1217+
RLEContext::RLEContext(SILFunction *F, SILPassManager *PM,
1218+
bool disableArrayLoads, bool onlyImmutableLoads)
1219+
: Fn(F), PM(PM), AA(PM->getAnalysis<AliasAnalysis>(F)),
1220+
TE(PM->getAnalysis<TypeExpansionAnalysis>()),
1221+
PO(PM->getAnalysis<PostOrderAnalysis>()->get(F)),
1222+
EAFI(PM->getAnalysis<EpilogueARCAnalysis>()->get(F)),
1223+
BBToLocState(F), BBWithLoads(F),
12081224
ArrayType(disableArrayLoads
12091225
? F->getModule().getASTContext().getArrayDecl()
1210-
: nullptr)
1226+
: nullptr),
1227+
onlyImmutableLoads(onlyImmutableLoads)
12111228
#ifndef NDEBUG
12121229
,
12131230
printCtx(llvm::dbgs(), /*Verbose=*/false, /*Sorted=*/true)
@@ -1567,14 +1584,15 @@ bool RLEContext::run() {
15671584
// Phase 4. we perform the redundant load elimination.
15681585
// Walk over the function and find all the locations accessed by
15691586
// this function.
1570-
std::pair<int, int> LSCount = std::make_pair(0, 0);
1587+
int numLoads = 0, numStores = 0;
15711588
LSLocation::enumerateLSLocations(*Fn, LocationVault,
15721589
LocToBitIndex,
15731590
BaseToLocIndex, TE,
1574-
LSCount);
1591+
/*stopAtImmutable*/ onlyImmutableLoads,
1592+
numLoads, numStores, immutableLoadsFound);
15751593

15761594
// Check how to optimize this function.
1577-
ProcessKind Kind = getProcessFunctionKind(LSCount.first, LSCount.second);
1595+
ProcessKind Kind = getProcessFunctionKind(numLoads, numStores);
15781596

15791597
// We do not optimize this function at all.
15801598
if (Kind == ProcessKind::ProcessNone)
@@ -1681,15 +1699,21 @@ class RedundantLoadElimination : public SILFunctionTransform {
16811699
LLVM_DEBUG(llvm::dbgs() << "*** RLE on function: " << F->getName()
16821700
<< " ***\n");
16831701

1684-
auto *AA = PM->getAnalysis<AliasAnalysis>(F);
1685-
auto *TE = PM->getAnalysis<TypeExpansionAnalysis>();
1686-
auto *PO = PM->getAnalysis<PostOrderAnalysis>()->get(F);
1687-
auto *EAFI = PM->getAnalysis<EpilogueARCAnalysis>()->get(F);
1688-
1689-
RLEContext RLE(F, PM, AA, TE, PO, EAFI, disableArrayLoads);
1702+
RLEContext RLE(F, PM, disableArrayLoads,
1703+
/*onlyImmutableLoads*/ false);
16901704
if (RLE.run()) {
16911705
invalidateAnalysis(SILAnalysis::InvalidationKind::Instructions);
16921706
}
1707+
if (RLE.shouldOptimizeImmutableLoads()) {
1708+
/// Re-running RLE with cutting base addresses off at
1709+
/// `ref_element_addr [immutable]` or `ref_tail_addr [immutable]` can
1710+
/// expose additional opportunities.
1711+
RLEContext RLE2(F, PM, disableArrayLoads,
1712+
/*onlyImmutableLoads*/ true);
1713+
if (RLE2.run()) {
1714+
invalidateAnalysis(SILAnalysis::InvalidationKind::Instructions);
1715+
}
1716+
}
16931717
}
16941718
};
16951719

lib/SILOptimizer/Utils/LoadStoreOptUtils.cpp

Lines changed: 48 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -245,28 +245,60 @@ void LSLocation::reduce(LSLocation Base, SILModule *M,
245245
replaceSubLocations(Base, M, context, Locs, SubLocations);
246246
}
247247

248-
void LSLocation::enumerateLSLocation(TypeExpansionContext context, SILModule *M,
248+
std::pair<SILValue, bool>
249+
LSLocation::getBaseAddressOrObject(SILValue v, bool stopAtImmutable) {
250+
bool isImmutable = false;
251+
while (true) {
252+
if (auto *rea = dyn_cast<RefElementAddrInst>(v)) {
253+
if (rea->isImmutable()) {
254+
isImmutable = true;
255+
if (stopAtImmutable)
256+
return {v, true};
257+
}
258+
}
259+
if (auto *rta = dyn_cast<RefTailAddrInst>(v)) {
260+
if (rta->isImmutable()) {
261+
isImmutable = true;
262+
if (stopAtImmutable)
263+
return {v, true};
264+
}
265+
}
266+
SILValue v2 = stripCastsWithoutMarkDependence(v);
267+
v2 = stripSinglePredecessorArgs(v2);
268+
if (Projection::isAddressProjection(v2))
269+
v2 = cast<SingleValueInstruction>(v2)->getOperand(0);
270+
v2 = stripIndexingInsts(v2);
271+
v2 = lookThroughOwnershipInsts(v2);
272+
if (v2 == v)
273+
return {v2, isImmutable};
274+
v = v2;
275+
}
276+
}
277+
278+
bool LSLocation::enumerateLSLocation(TypeExpansionContext context, SILModule *M,
249279
SILValue Mem,
250280
std::vector<LSLocation> &Locations,
251281
LSLocationIndexMap &IndexMap,
252282
LSLocationBaseMap &BaseMap,
253-
TypeExpansionAnalysis *TypeCache) {
283+
TypeExpansionAnalysis *TypeCache,
284+
bool stopAtImmutable) {
254285
// We have processed this SILValue before.
255286
if (BaseMap.find(Mem) != BaseMap.end())
256-
return;
287+
return false;
257288

258289
// Construct a Location to represent the memory written by this instruction.
259290
// ProjectionPath currently does not handle mark_dependence so stop our
260291
// underlying object search at these instructions.
261292
// We still get a benefit if we cse mark_dependence instructions and then
262293
// merge loads from them.
263-
SILValue UO = getUnderlyingObjectStopAtMarkDependence(Mem);
294+
auto baseAndImmutable = getBaseAddressOrObject(Mem, stopAtImmutable);
295+
SILValue UO = baseAndImmutable.first;
264296
LSLocation L(UO, ProjectionPath::getProjectionPath(UO, Mem));
265297

266298
// If we can't figure out the Base or Projection Path for the memory location,
267299
// simply ignore it for now.
268300
if (!L.isValid())
269-
return;
301+
return false;
270302

271303
// Record the SILValue to location mapping.
272304
BaseMap[Mem] = L;
@@ -281,6 +313,7 @@ void LSLocation::enumerateLSLocation(TypeExpansionContext context, SILModule *M,
281313
IndexMap[Loc] = Locations.size();
282314
Locations.push_back(Loc);
283315
}
316+
return baseAndImmutable.first;
284317
}
285318

286319
void
@@ -289,22 +322,26 @@ LSLocation::enumerateLSLocations(SILFunction &F,
289322
LSLocationIndexMap &IndexMap,
290323
LSLocationBaseMap &BaseMap,
291324
TypeExpansionAnalysis *TypeCache,
292-
std::pair<int, int> &LSCount) {
325+
bool stopAtImmutable,
326+
int &numLoads, int &numStores,
327+
bool &immutableLoadsFound) {
293328
// Enumerate all locations accessed by the loads or stores.
294329
for (auto &B : F) {
295330
for (auto &I : B) {
296331
if (auto *LI = dyn_cast<LoadInst>(&I)) {
297-
enumerateLSLocation(F.getTypeExpansionContext(), &I.getModule(),
332+
if (enumerateLSLocation(F.getTypeExpansionContext(), &I.getModule(),
298333
LI->getOperand(), Locations, IndexMap, BaseMap,
299-
TypeCache);
300-
++LSCount.first;
334+
TypeCache, stopAtImmutable)) {
335+
immutableLoadsFound = true;
336+
}
337+
++numLoads;
301338
continue;
302339
}
303340
if (auto *SI = dyn_cast<StoreInst>(&I)) {
304341
enumerateLSLocation(F.getTypeExpansionContext(), &I.getModule(),
305342
SI->getDest(), Locations, IndexMap, BaseMap,
306-
TypeCache);
307-
++LSCount.second;
343+
TypeCache, stopAtImmutable);
344+
++numStores;
308345
continue;
309346
}
310347
}

test/SILOptimizer/redundant_load_elim_ossa.sil

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,22 @@ bb0(%0 : @owned $AB):
163163
return %5 : $Int
164164
}
165165

166+
// CHECK-LABEL: sil [ossa] @forward_load_of_immutable_class_property
167+
// CHECK: [[L:%[0-9]+]] = load
168+
// CHECK: apply %{{[0-9]+}}([[L]])
169+
// CHECK-NOT: load
170+
// CHECK: return [[L]]
171+
// CHECK-LABEL: } // end sil function 'forward_load_of_immutable_class_property'
172+
sil [ossa] @forward_load_of_immutable_class_property : $@convention(thin) (@guaranteed AB) -> Int {
173+
bb0(%0 : @guaranteed $AB):
174+
%1 = ref_element_addr [immutable] %0 : $AB, #AB.value
175+
%2 = load [trivial] %1 : $*Int
176+
%3 = function_ref @use_Int : $@convention(thin) (Int) -> ()
177+
apply %3(%2) : $@convention(thin) (Int) -> ()
178+
%5 = load [trivial] %1 : $*Int
179+
return %5 : $Int
180+
}
181+
166182
// CHECK-LABEL: sil hidden [ossa] @load_forward_across_end_cow_mutation :
167183
// CHECK-NOT: = load
168184
// CHECK-LABEL: } // end sil function 'load_forward_across_end_cow_mutation'

0 commit comments

Comments
 (0)