Skip to content

Commit f6c008f

Browse files
authored
Merge pull request swiftlang#34115 from eeckstein/enum-optimizations
SILOptimizer: two optimization improvements for address-only enums
2 parents 327f4f8 + 0a71d0f commit f6c008f

File tree

7 files changed

+265
-20
lines changed

7 files changed

+265
-20
lines changed

lib/SILOptimizer/SILCombiner/SILCombinerMiscVisitors.cpp

Lines changed: 44 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -453,6 +453,10 @@ bool SILCombiner::optimizeStackAllocatedEnum(AllocStackInst *AS) {
453453
return false;
454454

455455
EnumElementDecl *element = nullptr;
456+
unsigned numInits =0;
457+
unsigned numTakes = 0;
458+
SILBasicBlock *initBlock = nullptr;
459+
SILBasicBlock *takeBlock = nullptr;
456460
SILType payloadType;
457461

458462
// First step: check if the stack location is only used to hold one specific
@@ -463,6 +467,8 @@ bool SILCombiner::optimizeStackAllocatedEnum(AllocStackInst *AS) {
463467
case SILInstructionKind::DebugValueAddrInst:
464468
case SILInstructionKind::DestroyAddrInst:
465469
case SILInstructionKind::DeallocStackInst:
470+
case SILInstructionKind::InjectEnumAddrInst:
471+
// We'll check init_enum_addr below.
466472
break;
467473
case SILInstructionKind::InitEnumDataAddrInst: {
468474
auto *ieda = cast<InitEnumDataAddrInst>(user);
@@ -472,20 +478,17 @@ bool SILCombiner::optimizeStackAllocatedEnum(AllocStackInst *AS) {
472478
element = el;
473479
assert(!payloadType || payloadType == ieda->getType());
474480
payloadType = ieda->getType();
475-
break;
476-
}
477-
case SILInstructionKind::InjectEnumAddrInst: {
478-
auto *el = cast<InjectEnumAddrInst>(user)->getElement();
479-
if (element && el != element)
480-
return false;
481-
element = el;
481+
numInits++;
482+
initBlock = user->getParent();
482483
break;
483484
}
484485
case SILInstructionKind::UncheckedTakeEnumDataAddrInst: {
485486
auto *el = cast<UncheckedTakeEnumDataAddrInst>(user)->getElement();
486487
if (element && el != element)
487488
return false;
488489
element = el;
490+
numTakes++;
491+
takeBlock = user->getParent();
489492
break;
490493
}
491494
default:
@@ -495,6 +498,24 @@ bool SILCombiner::optimizeStackAllocatedEnum(AllocStackInst *AS) {
495498
if (!element || !payloadType)
496499
return false;
497500

501+
// If the enum has a single init-take pair in a single block, we know that
502+
// the enum cannot contain any valid payload outside that init-take pair.
503+
//
504+
// This also means that we can ignore any inject_enum_addr of another enum
505+
// case, because this can only inject a case without a payload.
506+
bool singleInitTakePair =
507+
(numInits == 1 && numTakes == 1 && initBlock == takeBlock);
508+
if (!singleInitTakePair) {
509+
// No single init-take pair: We cannot ignore inject_enum_addrs with a
510+
// mismatching case.
511+
for (auto *use : AS->getUses()) {
512+
if (auto *inject = dyn_cast<InjectEnumAddrInst>(use->getUser())) {
513+
if (inject->getElement() != element)
514+
return false;
515+
}
516+
}
517+
}
518+
498519
// Second step: replace the enum alloc_stack with a payload alloc_stack.
499520
auto *newAlloc = Builder.createAllocStack(
500521
AS->getLoc(), payloadType, AS->getVarInfo(), AS->hasDynamicLifetime());
@@ -508,6 +529,22 @@ bool SILCombiner::optimizeStackAllocatedEnum(AllocStackInst *AS) {
508529
eraseInstFromFunction(*user);
509530
break;
510531
case SILInstructionKind::DestroyAddrInst:
532+
if (singleInitTakePair) {
533+
// It's not possible that the enum has a payload at the destroy_addr,
534+
// because it must have already been taken by the take of the
535+
// single init-take pair.
536+
// We _have_ to remove the destroy_addr, because we also remove all
537+
// inject_enum_addrs which might inject a payload-less case before
538+
// the destroy_addr.
539+
eraseInstFromFunction(*user);
540+
} else {
541+
// The enum payload can still be valid at the destroy_addr, so we have
542+
// to keep the destroy_addr. Just replace the enum with the payload
543+
// (and because it's not a singleInitTakePair, we can be sure that the
544+
// enum cannot have any other case than the payload case).
545+
use->set(newAlloc);
546+
}
547+
break;
511548
case SILInstructionKind::DeallocStackInst:
512549
use->set(newAlloc);
513550
break;

lib/SILOptimizer/Transforms/SimplifyCFG.cpp

Lines changed: 66 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -916,16 +916,57 @@ static bool couldRemoveRelease(SILBasicBlock *SrcBB, SILValue SrcV,
916916
return IsReleaseOfDest;
917917
}
918918

919+
/// Returns true if any instruction in \p block may write memory.
920+
static bool blockMayWriteMemory(SILBasicBlock *block) {
921+
for (auto instAndIdx : llvm::enumerate(*block)) {
922+
if (instAndIdx.value().mayWriteToMemory())
923+
return true;
924+
// Only look at the first 20 instructions to avoid compile time problems for
925+
// corner cases of very large blocks without memory writes.
926+
// 20 instructions is more than enough.
927+
if (instAndIdx.index() > 50)
928+
return true;
929+
}
930+
return false;
931+
}
932+
933+
// Returns true if \p block contains an injected an enum case into \p enumAddr
934+
// which is valid at the end of the block.
935+
static bool hasInjectedEnumAtEndOfBlock(SILBasicBlock *block, SILValue enumAddr) {
936+
for (auto instAndIdx : llvm::enumerate(llvm::reverse(*block))) {
937+
SILInstruction &inst = instAndIdx.value();
938+
if (auto *injectInst = dyn_cast<InjectEnumAddrInst>(&inst)) {
939+
return injectInst->getOperand() == enumAddr;
940+
}
941+
if (inst.mayWriteToMemory())
942+
return false;
943+
// Only look at the first 20 instructions to avoid compile time problems for
944+
// corner cases of very large blocks without memory writes.
945+
// 20 instructions is more than enough.
946+
if (instAndIdx.index() > 50)
947+
return false;
948+
}
949+
return false;
950+
}
951+
919952
/// tryJumpThreading - Check to see if it looks profitable to duplicate the
920953
/// destination of an unconditional jump into the bottom of this block.
921954
bool SimplifyCFG::tryJumpThreading(BranchInst *BI) {
922955
auto *DestBB = BI->getDestBB();
923956
auto *SrcBB = BI->getParent();
957+
TermInst *destTerminator = DestBB->getTerminator();
924958
// If the destination block ends with a return, we don't want to duplicate it.
925959
// We want to maintain the canonical form of a single return where possible.
926-
if (DestBB->getTerminator()->isFunctionExiting())
960+
if (destTerminator->isFunctionExiting())
927961
return false;
928962

963+
// Jump threading only makes sense if there is an argument on the branch
964+
// (which is reacted on in the DestBB), or if this goes through a memory
965+
// location (switch_enum_addr is the only adress-instruction which we
966+
// currently handle).
967+
if (BI->getArgs().empty() && !isa<SwitchEnumAddrInst>(destTerminator))
968+
return false;
969+
929970
// We don't have a great cost model at the SIL level, so we don't want to
930971
// blissly duplicate tons of code with a goal of improved performance (we'll
931972
// leave that to LLVM). However, doing limited code duplication can lead to
@@ -956,11 +997,29 @@ bool SimplifyCFG::tryJumpThreading(BranchInst *BI) {
956997
}
957998
}
958999

959-
if (ThreadingBudget == 0 && isa<CondBranchInst>(DestBB->getTerminator())) {
960-
for (auto V : BI->getArgs()) {
961-
if (isa<IntegerLiteralInst>(V) || isa<FloatLiteralInst>(V)) {
1000+
if (ThreadingBudget == 0) {
1001+
if (isa<CondBranchInst>(destTerminator)) {
1002+
for (auto V : BI->getArgs()) {
1003+
if (isa<IntegerLiteralInst>(V) || isa<FloatLiteralInst>(V)) {
1004+
ThreadingBudget = 4;
1005+
break;
1006+
}
1007+
}
1008+
} else if (auto *SEA = dyn_cast<SwitchEnumAddrInst>(destTerminator)) {
1009+
// If the branch-block injects a certain enum case and the destination
1010+
// switches on that enum, it's worth jump threading. E.g.
1011+
//
1012+
// inject_enum_addr %enum : $*Optional<T>, #Optional.some
1013+
// ... // no memory writes here
1014+
// br DestBB
1015+
// DestBB:
1016+
// ... // no memory writes here
1017+
// switch_enum_addr %enum : $*Optional<T>, case #Optional.some ...
1018+
//
1019+
SILValue enumAddr = SEA->getOperand();
1020+
if (!blockMayWriteMemory(DestBB) &&
1021+
hasInjectedEnumAtEndOfBlock(SrcBB, enumAddr)) {
9621022
ThreadingBudget = 4;
963-
break;
9641023
}
9651024
}
9661025
}
@@ -976,7 +1035,7 @@ bool SimplifyCFG::tryJumpThreading(BranchInst *BI) {
9761035
// control flow. Still, we make an exception for switch_enum.
9771036
bool DestIsLoopHeader = (LoopHeaders.count(DestBB) != 0);
9781037
if (DestIsLoopHeader) {
979-
if (!isa<SwitchEnumInst>(DestBB->getTerminator()))
1038+
if (!isa<SwitchEnumInst>(destTerminator))
9801039
return false;
9811040
}
9821041

@@ -1286,8 +1345,7 @@ bool SimplifyCFG::simplifyBranchBlock(BranchInst *BI) {
12861345
// If this unconditional branch has BBArgs, check to see if duplicating the
12871346
// destination would allow it to be simplified. This is a simple form of jump
12881347
// threading.
1289-
if (!isVeryLargeFunction && !BI->getArgs().empty() &&
1290-
tryJumpThreading(BI))
1348+
if (!isVeryLargeFunction && tryJumpThreading(BI))
12911349
return true;
12921350

12931351
return Simplified;

test/SILOptimizer/cast_optimizer_conditional_conformance.sil

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -98,8 +98,6 @@ bb6:
9898
// CHECK: [[IEDA:%.*]] = init_enum_data_addr [[OPT]] : $*Optional<HasFoo>, #Optional.some!enumelt
9999
// CHECK: checked_cast_addr_br take_always S1<Int8> in [[VAL]] : $*S1<Int8> to HasFoo in [[IEDA]] : $*HasFoo, bb1, bb2
100100
// bbs...
101-
// CHECK: switch_enum_addr [[OPT]] : $*Optional<HasFoo>, case #Optional.some!enumelt: bb4, case #Optional.none!enumelt: bb5
102-
// bbs...
103101
// CHECK: [[UNWRAP:%.*]] = unchecked_take_enum_data_addr [[OPT]] : $*Optional<HasFoo>, #Optional.some!enumelt
104102
// CHECK: copy_addr [take] [[UNWRAP]] to [initialization] [[EXIS]] : $*HasFoo
105103
// CHECK: [[OPEN:%.*]] = open_existential_addr immutable_access [[EXIS]] : $*HasFoo to $*@opened("4E16CBC0-FD9F-11E8-A311-D0817AD9F6DD") HasFoo

test/SILOptimizer/enum_jump_thread.sil

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,3 +49,83 @@ bb5(%7 : $E2):
4949
return %7 : $E2
5050
}
5151

52+
// CHECK-LABEL: sil @test_enum_addr
53+
sil @test_enum_addr : $@convention(thin) () -> Builtin.Int32 {
54+
bb0:
55+
%2 = alloc_stack $E
56+
cond_br undef, bb1, bb2
57+
58+
// CHECK: bb1:
59+
// CHECK-NEXT: inject_enum_addr
60+
// CHECK-NEXT: switch_enum_addr
61+
bb1:
62+
inject_enum_addr %2 : $*E, #E.A!enumelt
63+
br bb3
64+
65+
// CHECK: bb2:
66+
// CHECK-NEXT: inject_enum_addr
67+
// CHECK-NEXT: switch_enum_addr
68+
bb2:
69+
inject_enum_addr %2 : $*E, #E.B!enumelt
70+
br bb3
71+
72+
bb3:
73+
switch_enum_addr %2 : $*E, case #E.A!enumelt: bb4, case #E.B!enumelt: bb5
74+
75+
bb4:
76+
%10 = integer_literal $Builtin.Int32, 1
77+
br bb6(%10 : $Builtin.Int32)
78+
79+
bb5:
80+
%11 = integer_literal $Builtin.Int32, 2
81+
br bb6(%11 : $Builtin.Int32)
82+
83+
bb6(%12 : $Builtin.Int32):
84+
dealloc_stack %2 : $*E
85+
return %12 : $Builtin.Int32
86+
// CHECK: } // end sil function 'test_enum_addr'
87+
}
88+
89+
// CHECK-LABEL: sil @dont_jumpthread_enum_addr
90+
sil @dont_jumpthread_enum_addr : $@convention(thin) (E) -> Builtin.Int32 {
91+
bb0(%0 : $E):
92+
%2 = alloc_stack $E
93+
%3 = alloc_stack $E
94+
cond_br undef, bb1, bb2
95+
96+
// CHECK: bb1:
97+
// CHECK-NEXT: inject_enum_addr
98+
// CHECK-NEXT: store
99+
// CHECK-NEXT: br bb3
100+
bb1:
101+
inject_enum_addr %2 : $*E, #E.A!enumelt
102+
store %0 to %2 : $*E
103+
br bb3
104+
105+
// CHECK: bb2:
106+
// CHECK-NEXT: inject_enum_addr
107+
// CHECK-NEXT: br bb3
108+
bb2:
109+
inject_enum_addr %3 : $*E, #E.A!enumelt
110+
br bb3
111+
112+
// CHECK: bb3:
113+
// CHECK-NEXT: switch_enum_addr
114+
bb3:
115+
switch_enum_addr %2 : $*E, case #E.A!enumelt: bb4, case #E.B!enumelt: bb5
116+
117+
bb4:
118+
%10 = integer_literal $Builtin.Int32, 1
119+
br bb6(%10 : $Builtin.Int32)
120+
121+
bb5:
122+
%11 = integer_literal $Builtin.Int32, 2
123+
br bb6(%11 : $Builtin.Int32)
124+
125+
bb6(%12 : $Builtin.Int32):
126+
dealloc_stack %3 : $*E
127+
dealloc_stack %2 : $*E
128+
return %12 : $Builtin.Int32
129+
// CHECK: } // end sil function 'dont_jumpthread_enum_addr'
130+
}
131+

test/SILOptimizer/generic_loop.swift

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
// RUN: %target-swift-frontend -primary-file %s -O -sil-verify-all -module-name=test -emit-sil | %FileCheck %s
2+
3+
// REQUIRES: swift_stdlib_no_asserts,optimized_stdlib
4+
5+
6+
// Check that we can eliminate all optionals from a loop which is iterating
7+
// over an array of address-only elements.
8+
9+
// CHECK-LABEL: sil @$s4test0A18_no_optionals_usedyySayxGlF : $@convention(thin) <T> (@guaranteed Array<T>) -> () {
10+
// CHECK-NOT: Optional
11+
// CHECK: } // end sil function '$s4test0A18_no_optionals_usedyySayxGlF'
12+
public func test_no_optionals_used<T>(_ items: [T]) {
13+
for i in items {
14+
print(i)
15+
}
16+
}
17+

test/SILOptimizer/sil_combine_concrete_existential.swift

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -95,10 +95,11 @@ struct SS: PPP {
9595

9696
// The first apply has been devirtualized and inlined. The second remains unspecialized.
9797
// CHECK-LABEL: sil @$s32sil_combine_concrete_existential37testWitnessReturnOptionalIndirectSelfyyF : $@convention(thin) () -> () {
98+
// CHECK: [[O1:%.*]] = open_existential_addr immutable_access %{{.*}} : $*PPP to $*@opened("{{.*}}") PPP
9899
// CHECK: switch_enum_addr %{{.*}} : $*Optional<@opened("{{.*}}") PPP>, case #Optional.some!enumelt: bb{{.*}}, case #Optional.none!enumelt: bb{{.*}}
99-
// CHECK: [[O:%.*]] = open_existential_addr immutable_access %{{.*}} : $*PPP to $*@opened("{{.*}}") PPP
100-
// CHECK: [[W:%.*]] = witness_method $@opened("{{.*}}") PPP, #PPP.returnsOptionalIndirect : <Self where Self : PPP> (Self) -> () -> Self?, [[O]] : $*@opened("{{.*}}") PPP : $@convention(witness_method: PPP) <τ_0_0 where τ_0_0 : PPP> (@in_guaranteed τ_0_0) -> @out Optional<τ_0_0>
101-
// CHECK: apply [[W]]<@opened("{{.*}}") PPP>(%{{.*}}, [[O]]) : $@convention(witness_method: PPP) <τ_0_0 where τ_0_0 : PPP> (@in_guaranteed τ_0_0) -> @out Optional<τ_0_0>
100+
// CHECK: [[O2:%.*]] = open_existential_addr immutable_access %{{.*}} : $*PPP to $*@opened("{{.*}}") PPP
101+
// CHECK: [[W:%.*]] = witness_method $@opened("{{.*}}") PPP, #PPP.returnsOptionalIndirect : <Self where Self : PPP> (Self) -> () -> Self?, [[O1]] : $*@opened("{{.*}}") PPP : $@convention(witness_method: PPP) <τ_0_0 where τ_0_0 : PPP> (@in_guaranteed τ_0_0) -> @out Optional<τ_0_0>
102+
// CHECK: apply [[W]]<@opened("{{.*}}") PPP>(%{{.*}}, [[O2]]) : $@convention(witness_method: PPP) <τ_0_0 where τ_0_0 : PPP> (@in_guaranteed τ_0_0) -> @out Optional<τ_0_0>
102103
// CHECK-LABEL: } // end sil function '$s32sil_combine_concrete_existential37testWitnessReturnOptionalIndirectSelfyyF'
103104
public func testWitnessReturnOptionalIndirectSelf() {
104105
let p: PPP = S()

0 commit comments

Comments
 (0)