Skip to content

Commit 868156c

Browse files
authored
Merge pull request swiftlang#24773 from atrick/fix-accessopt-slow
Add AccessEnforcementOpts fast paths.
2 parents 9647483 + 1120617 commit 868156c

File tree

4 files changed

+205
-27
lines changed

4 files changed

+205
-27
lines changed

include/swift/SIL/MemAccessUtils.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,8 @@ class AccessedStorage {
143143
SWIFT_INLINE_BITFIELD_FULL(AccessEnforcementOptsInfo, AccessedStorage,
144144
64 - NumAccessedStorageBits,
145145
seenNestedConflict : 1,
146-
beginAccessIndex : 63 - NumAccessedStorageBits);
146+
seenIdenticalStorage : 1,
147+
beginAccessIndex : 62 - NumAccessedStorageBits);
147148

148149
// Define data flow bits for use in the AccessEnforcementDom pass. Each
149150
// begin_access in the function is mapped to one instance of this subclass.

lib/SILOptimizer/Analysis/AccessedStorageAnalysis.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,6 @@ bool AccessedStorageResult::mergeAccesses(
129129
// can occur ~1000 elements. 200 is large enough to cover "normal" code,
130130
// while ensuring compile time isn't affected.
131131
if (storageAccessSet.size() > 200) {
132-
llvm::dbgs() << "BIG SET " << storageAccessSet.size() << "\n";
133132
setWorstEffects();
134133
return true;
135134
}

lib/SILOptimizer/Transforms/AccessEnforcementOpts.cpp

Lines changed: 92 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -129,10 +129,23 @@ class AccessEnforcementOptsInfo : public AccessedStorage {
129129
Bits.AccessEnforcementOptsInfo.seenNestedConflict = 1;
130130
}
131131

132+
/// Did a PostOrder walk previously find another access to the same
133+
/// storage. If so, then this access could be merged with a subsequent access
134+
/// after checking for conflicts.
135+
bool seenIdenticalStorage() const {
136+
return Bits.AccessEnforcementOptsInfo.seenIdenticalStorage;
137+
}
138+
139+
void setSeenIdenticalStorage() {
140+
Bits.AccessEnforcementOptsInfo.seenIdenticalStorage = 1;
141+
}
142+
132143
void dump() const {
133144
AccessedStorage::dump();
134145
llvm::dbgs() << " access index: " << getAccessIndex() << " <"
135-
<< (seenNestedConflict() ? "" : "no ") << "conflict>\n";
146+
<< (seenNestedConflict() ? "" : "no ") << "conflict> <"
147+
<< (seenIdenticalStorage() ? "" : "not ") << "seen identical>"
148+
<< "\n";
136149
}
137150
};
138151
using AccessInfo = AccessEnforcementOptsInfo;
@@ -267,21 +280,26 @@ class AccessConflictAndMergeAnalysis {
267280

268281
private:
269282
LoopRegionFunctionInfo *LRFI;
283+
PostOrderFunctionInfo *PO;
270284
AccessedStorageAnalysis *ASA;
271285

286+
// Unique storage locations seen in this function.
287+
AccessedStorageSet storageSet;
288+
272289
Result result;
273290

274291
public:
275292
AccessConflictAndMergeAnalysis(LoopRegionFunctionInfo *LRFI,
293+
PostOrderFunctionInfo *PO,
276294
AccessedStorageAnalysis *ASA)
277-
: LRFI(LRFI), ASA(ASA) {}
295+
: LRFI(LRFI), PO(PO), ASA(ASA) {}
278296

279-
void analyze();
297+
bool analyze();
280298

281299
const Result &getResult() { return result; }
282300

283301
protected:
284-
void identifyBeginAccesses();
302+
bool identifyBeginAccesses();
285303

286304
void
287305
propagateAccessSetsBottomUp(LoopRegionToAccessedStorage &regionToStorageMap,
@@ -423,10 +441,12 @@ BeginAccessInst *AccessConflictAndMergeAnalysis::findMergeableOutOfScopeAccess(
423441
// Given a mergeableAccess, 'A', another out-of-scope access, 'B', and the
424442
// current access, 'C' which has identical storage as 'A', the only situation
425443
// in which it is illegal to merge 'A' with 'C' is when 'B' has non-distinct
426-
// storage from 'A'/'C' and 'B' begins after 'A' and ends before 'C'. This
427-
// would introduce a false conflict. Since it is impossible to determine here
428-
// whether 'A' and 'B' overlap, we assume they do not and avoid merging. The
429-
// case in which they actually do overlap is an unimportant to optimize.
444+
// storage from 'A'/'C', 'B' begins after 'A', and 'B' ends before
445+
// 'C'. Merging 'A' with 'C' would then introduce a false conflict. Since it
446+
// is impossible to determine here whether 'A' and 'B' overlap, we assume they
447+
// do not and simply avoid merging whenever 'B' and 'C' overlap. It is not
448+
// important to optimize the case in which 'A' and 'B' overlap because
449+
// potential conflicts like that are unlikely.
430450
if (llvm::any_of(state.outOfScopeConflictFreeAccesses,
431451
[&](BeginAccessInst *bai) {
432452
auto storageInfo = result.getAccessInfo(bai);
@@ -454,6 +474,11 @@ void AccessConflictAndMergeAnalysis::insertOutOfScopeAccess(
454474
RegionState &state, BeginAccessInst *beginAccess,
455475
AccessInfo &currStorageInfo) {
456476

477+
if (!currStorageInfo.seenIdenticalStorage()) {
478+
LLVM_DEBUG(llvm::dbgs() << "Ignoring unmergeable access: " << *beginAccess);
479+
return;
480+
}
481+
457482
auto identicalStorageIter = llvm::find_if(
458483
state.outOfScopeConflictFreeAccesses, [&](BeginAccessInst *bai) {
459484
auto storageInfo = result.getAccessInfo(bai);
@@ -468,8 +493,13 @@ void AccessConflictAndMergeAnalysis::insertOutOfScopeAccess(
468493
}
469494

470495
// Top-level driver for AccessConflictAndMergeAnalysis
471-
void AccessConflictAndMergeAnalysis::analyze() {
472-
identifyBeginAccesses();
496+
//
497+
// Returns true if the analysis succeeded.
498+
bool AccessConflictAndMergeAnalysis::analyze() {
499+
if (!identifyBeginAccesses()) {
500+
LLVM_DEBUG(llvm::dbgs() << "Skipping AccessConflictAndMergeAnalysis...\n");
501+
return false;
502+
}
473503
LoopRegionToAccessedStorage accessSetsOfRegions;
474504
// Populate a worklist of regions such that the top of the worklist is the
475505
// innermost loop and the bottom of the worklist is the entry block.
@@ -499,6 +529,7 @@ void AccessConflictAndMergeAnalysis::analyze() {
499529
}
500530
}
501531
}
532+
return true;
502533
}
503534

504535
// Find all begin access operations in this function. Map each access to
@@ -507,29 +538,52 @@ void AccessConflictAndMergeAnalysis::analyze() {
507538
//
508539
// Also, add the storage location to the function's RegionStorage
509540
//
541+
// Returns true if it is worthwhile to continue the analysis.
542+
//
510543
// TODO: begin_unpaired_access is not tracked. Even though begin_unpaired_access
511544
// isn't explicitly paired, it may be possible after devirtualization and
512545
// inlining to find all uses of the scratch buffer. However, this doesn't
513546
// currently happen in practice (rdar://40033735).
514-
void AccessConflictAndMergeAnalysis::identifyBeginAccesses() {
515-
for (auto &BB : *LRFI->getFunction()) {
516-
for (auto &I : BB) {
547+
bool AccessConflictAndMergeAnalysis::identifyBeginAccesses() {
548+
bool seenPossibleNestedConflict = false;
549+
bool seenIdenticalStorage = false;
550+
// Scan blocks in PostOrder (bottom-up) to mark any accesses with identical
551+
// storage to another reachable access. The earlier access must be marked
552+
// because this analysis does forward data flow to find conflicts.
553+
for (auto *BB : PO->getPostOrder()) {
554+
for (auto &I : llvm::reverse(*BB)) {
517555
auto *beginAccess = dyn_cast<BeginAccessInst>(&I);
518556
if (!beginAccess)
519557
continue;
520558

521559
if (beginAccess->getEnforcement() != SILAccessEnforcement::Dynamic)
522560
continue;
523561

562+
if (!beginAccess->hasNoNestedConflict())
563+
seenPossibleNestedConflict = true;
564+
524565
// The accessed base is expected to be valid for begin_access, but for
525566
// now, since this optimization runs at the end of the pipeline, we
526567
// gracefully ignore unrecognized source address patterns, which show up
527568
// here as an invalid `storage` value.
528-
const AccessedStorage &storage =
569+
AccessedStorage storage =
529570
findAccessedStorageNonNested(beginAccess->getSource());
530571

531-
auto iterAndSuccess = result.accessMap.try_emplace(
532-
beginAccess, static_cast<const AccessInfo &>(storage));
572+
auto iterAndInserted = storageSet.insert(storage);
573+
574+
// After inserting it in storageSet, this storage object can be downcast
575+
// to AccessInfo to use the pass-specific bits.
576+
auto &accessInfo = static_cast<AccessInfo &>(storage);
577+
578+
// If the same location was seen later in the CFG, mark this access as one
579+
// to check for merging.
580+
if (!iterAndInserted.second) {
581+
seenIdenticalStorage = true;
582+
accessInfo.setSeenIdenticalStorage();
583+
}
584+
585+
auto iterAndSuccess =
586+
result.accessMap.try_emplace(beginAccess, accessInfo);
533587
(void)iterAndSuccess;
534588
assert(iterAndSuccess.second);
535589

@@ -539,6 +593,7 @@ void AccessConflictAndMergeAnalysis::identifyBeginAccesses() {
539593
assert(!info.seenNestedConflict());
540594
}
541595
}
596+
return seenPossibleNestedConflict || seenIdenticalStorage;
542597
}
543598

544599
// Returns a mapping from each loop sub-region to all its access storage
@@ -617,11 +672,14 @@ void AccessConflictAndMergeAnalysis::visitBeginAccess(
617672
recordInScopeConflicts(state, beginAccessInfo, beginAccess->getAccessKind());
618673
// Remove in-scope conflicts to avoid checking them again.
619674
removeConflicts(state.inScopeConflictFreeAccesses, beginAccessInfo);
620-
// Always record the current access as in-scope. It can potentially be folded
621-
// to [no_nested_conflict] independent of any enclosing access conflicts.
622-
bool inserted = state.inScopeConflictFreeAccesses.insert(beginAccess);
623-
(void)inserted;
624-
assert(inserted && "the begin_access should not have been seen yet.");
675+
676+
if (!beginAccess->hasNoNestedConflict()) {
677+
// Record the current access as in-scope. It can potentially be folded to
678+
// [no_nested_conflict] independent of any enclosing access conflicts.
679+
bool inserted = state.inScopeConflictFreeAccesses.insert(beginAccess);
680+
(void)inserted;
681+
assert(inserted && "the begin_access should not have been seen yet.");
682+
}
625683

626684
// Find an out-of-scope access that is mergeable with this access. This is
627685
// done at the BeginAccess because it doesn't matter whether the merged access
@@ -635,7 +693,7 @@ void AccessConflictAndMergeAnalysis::visitBeginAccess(
635693
if (BeginAccessInst *mergeableAccess =
636694
findMergeableOutOfScopeAccess(state, beginAccess)) {
637695
LLVM_DEBUG(llvm::dbgs() << "Found mergable pair: " << *mergeableAccess
638-
<< ", " << *beginAccess << "\n");
696+
<< " with " << *beginAccess << "\n");
639697
result.mergePairs.emplace_back(mergeableAccess, beginAccess);
640698
}
641699
// For the purpose of data-flow, removing the out-of-scope access does not
@@ -947,6 +1005,12 @@ canMerge(PostDominanceInfo *postDomTree,
9471005
static bool mergeAccesses(
9481006
SILFunction *F, PostDominanceInfo *postDomTree,
9491007
const AccessConflictAndMergeAnalysis::MergeablePairs &mergePairs) {
1008+
1009+
if (mergePairs.empty()) {
1010+
LLVM_DEBUG(llvm::dbgs() << "Skipping SCC Analysis...\n");
1011+
return false;
1012+
}
1013+
9501014
bool changed = false;
9511015

9521016
// Compute a map from each block to its SCC -
@@ -993,7 +1057,7 @@ static bool mergeAccesses(
9931057
continue;
9941058

9951059
LLVM_DEBUG(llvm::dbgs()
996-
<< "Merging: " << *childIns << " into " << *parentIns << "\n");
1060+
<< "Merging " << *childIns << " into " << *parentIns << "\n");
9971061

9981062
// Change the no nested conflict of parent if the child has a nested
9991063
// conflict.
@@ -1038,9 +1102,12 @@ struct AccessEnforcementOpts : public SILFunctionTransform {
10381102
<< F->getName() << "\n");
10391103

10401104
LoopRegionFunctionInfo *LRFI = getAnalysis<LoopRegionAnalysis>()->get(F);
1105+
PostOrderFunctionInfo *PO = getAnalysis<PostOrderAnalysis>()->get(F);
10411106
AccessedStorageAnalysis *ASA = getAnalysis<AccessedStorageAnalysis>();
1042-
AccessConflictAndMergeAnalysis a(LRFI, ASA);
1043-
a.analyze();
1107+
AccessConflictAndMergeAnalysis a(LRFI, PO, ASA);
1108+
if (!a.analyze())
1109+
return;
1110+
10441111
auto result = a.getResult();
10451112

10461113
// Perform access folding by setting the [no_nested_conflict] flag on
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
// RUN: %target-sil-opt -access-enforcement-opts -debug-only=access-enforcement-opts -verify %s -o /dev/null 2>&1 | %FileCheck %s
2+
//
3+
// REQUIRES: asserts
4+
//
5+
// This tests four kinds of AccessEnforcementOpts fast paths:
6+
//
7+
// 1. During identifyAccess, determine if there are either any
8+
// identical accesses or an accesses that aren't already marked
9+
// no_nested_storage. If there are neither, then skip the subsequent
10+
// data flow analysis.
11+
//
12+
// 2. In StorageSet, indicate whether identical storage was seen
13+
// elsewhere in the function. During dataflow, only add an access to
14+
// the out-of-scope access set if was marked as having identical
15+
// storage with another access.
16+
//
17+
// 3. If no access pairs can be merged, avoid analyzing SCC regions in
18+
// the control flow graph.
19+
//
20+
// 4. During data flow, don't track in scope conflicts for
21+
// instructions already marked [no_nested_conflict].
22+
23+
sil_stage canonical
24+
25+
import Builtin
26+
import Swift
27+
import SwiftShims
28+
29+
public class TestEarlyBail {
30+
@_hasStorage var prop1: Int { get set }
31+
@_hasStorage var prop2: Int { get set }
32+
}
33+
34+
// All accesses are no_nested_conflict, and none are identical. Bail.
35+
// (1) Skipping AccessConflictAndMergeAnalysis...
36+
//
37+
// CHECK-LABEL: Running local AccessEnforcementOpts on testEarlyBail
38+
// CHECK-NOT: Processing
39+
// CHECK-NOT: No conflict on one path
40+
// CHECK-NOT: Folding
41+
// CHECK-NOT: Merging
42+
// CHECK: Skipping AccessConflictAndMergeAnalysis...
43+
sil @testEarlyBail : $@convention(method) (@owned TestEarlyBail) -> @owned TestEarlyBail {
44+
bb0(%0 : $TestEarlyBail):
45+
%adr1 = ref_element_addr %0 : $TestEarlyBail, #TestEarlyBail.prop1
46+
%a1 = begin_access [modify] [dynamic] [no_nested_conflict] %adr1 : $*Int
47+
%v1 = load %a1 : $*Int
48+
end_access %a1 : $*Int
49+
%adr2 = ref_element_addr %0 : $TestEarlyBail, #TestEarlyBail.prop2
50+
%a2 = begin_access [modify] [dynamic] [no_nested_conflict] %adr2 : $*Int
51+
%v2 = load %a2 : $*Int
52+
end_access %a2 : $*Int
53+
return %0 : $TestEarlyBail
54+
}
55+
56+
// An access is not marked no_nested_conflict. Need to run analysis.
57+
// NOT (1) Skipping AccessConflictAndMergeAnalysis...
58+
// (2) Ignoring unique access...
59+
// (3) Skipping SCC analysis...
60+
//
61+
// CHECK-LABEL: Running local AccessEnforcementOpts on testEarlyBailNeedsFolding
62+
// CHECK: Processing Function: testEarlyBailNeedsFolding
63+
// CHECK: No conflict on one path from [[ACCESS:%.*]] = begin_access [modify] [dynamic] %{{.*}} : $*Int
64+
// CHECK-NEXT: to end_access [[ACCESS]] : $*Int
65+
// CHECK: Ignoring unmergeable access: [[ACCESS]] = begin_access [modify] [dynamic] %{{.*}} : $*Int
66+
// CHECK: Folding [[ACCESS]] = begin_access [modify] [dynamic] [no_nested_conflict] %{{.*}} : $*Int
67+
// CHECK: Skipping SCC Analysis...
68+
sil @testEarlyBailNeedsFolding : $@convention(method) (@owned TestEarlyBail) -> @owned TestEarlyBail {
69+
bb0(%0 : $TestEarlyBail):
70+
%adr1 = ref_element_addr %0 : $TestEarlyBail, #TestEarlyBail.prop1
71+
%a1 = begin_access [modify] [dynamic] %adr1 : $*Int
72+
%v1 = load %a1 : $*Int
73+
end_access %a1 : $*Int
74+
return %0 : $TestEarlyBail
75+
}
76+
77+
// Two accesses have the same storage. Need to run analysis.
78+
// NOT (1) Skipping AccessConflictAndMergeAnalysis...
79+
// NOT (2) Ignoring unique access...
80+
// (4) No Conflict On Path
81+
// (4) NOT No Conflict On Path
82+
// NOT (3) Skipping SCC analysis...
83+
//
84+
// CHECK-LABEL: Running local AccessEnforcementOpts on testEarlyBailMayMerge
85+
// CHECK: Processing Function: testEarlyBailMayMerge
86+
// CHECK: No conflict on one path from [[ACCESS1:%.*]] = begin_access [modify] [dynamic] %{{.*}} : $*Int
87+
// CHECK: to end_access [[ACCESS1]] : $*Int
88+
// CHECK: Found mergable pair: [[ACCESS1]] = begin_access [modify] [dynamic] %{{.*}} : $*Int
89+
// CHECK-NEXT: with [[ACCESS2:%.*]] = begin_access [modify] [dynamic] [no_nested_conflict] %{{.*}} : $*Int
90+
// CHECK-NOT: No conflict on one path from [[ACCESS2]]
91+
// CHECK: Ignoring unmergeable access: [[ACCESS2]] = begin_access [modify] [dynamic] [no_nested_conflict] %{{.*}} : $*Int
92+
// CHECK-NOT: No conflict on one path from [[ACCESS2]]
93+
//
94+
// Note: The order that accesses with no nested conflicts are "Folded" is nondeterministic.
95+
// CHECK-DAG: Folding [[ACCESS1]] = begin_access [modify] [dynamic] [no_nested_conflict] %{{.*}} : $*Int
96+
// CHECK-DAG: Folding [[ACCESS2]] = begin_access [modify] [dynamic] [no_nested_conflict] %{{.*}} : $*Int
97+
// CHECK: Merging [[ACCESS2]] = begin_access [modify] [dynamic] [no_nested_conflict] %{{.*}} : $*Int
98+
// CHECK-NEXT: into [[ACCESS1]] = begin_access [modify] [dynamic] [no_nested_conflict] %{{.*}} : $*Int
99+
// CHECK-NOT: Skipping SCC Analysis...
100+
sil @testEarlyBailMayMerge : $@convention(method) (@owned TestEarlyBail) -> @owned TestEarlyBail {
101+
bb0(%0 : $TestEarlyBail):
102+
%adr1 = ref_element_addr %0 : $TestEarlyBail, #TestEarlyBail.prop1
103+
%a1 = begin_access [modify] [dynamic] %adr1 : $*Int
104+
%v1 = load %a1 : $*Int
105+
end_access %a1 : $*Int
106+
%adr2 = ref_element_addr %0 : $TestEarlyBail, #TestEarlyBail.prop1
107+
%a2 = begin_access [modify] [dynamic] [no_nested_conflict] %adr2 : $*Int
108+
%v2 = load %a2 : $*Int
109+
end_access %a2 : $*Int
110+
return %0 : $TestEarlyBail
111+
}

0 commit comments

Comments
 (0)