Skip to content

Commit 92a17f8

Browse files
committed
Optimizer: extract the NamedReturnValueOptimization from CopyForwarding to a separate function pass
This allows to run the NamedReturnValueOptimization only late in the pipeline. The optimization shouldn't be done before serialization, because it might prevent predictable memory optimizations in the caller after inlining.
1 parent 08e75f2 commit 92a17f8

File tree

8 files changed

+285
-177
lines changed

8 files changed

+285
-177
lines changed

SwiftCompilerSources/Sources/Optimizer/FunctionPasses/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ swift_compiler_sources(Optimizer
1414
InitializeStaticGlobals.swift
1515
ObjCBridgingOptimization.swift
1616
MergeCondFails.swift
17+
NamedReturnValueOptimization.swift
1718
ReleaseDevirtualizer.swift
1819
SimplificationPasses.swift
1920
StackPromotion.swift
Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
//===--- NamedReturnValueOptimization.swift --------------------------------==//
2+
//
3+
// This source file is part of the Swift.org open source project
4+
//
5+
// Copyright (c) 2014 - 2023 Apple Inc. and the Swift project authors
6+
// Licensed under Apache License v2.0 with Runtime Library Exception
7+
//
8+
// See https://swift.org/LICENSE.txt for license information
9+
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
import SIL
14+
15+
/// Removes a `copy_addr` to an indirect out argument by replacing the source of the copy
16+
/// (which must be an `alloc_stack`) with the out argument itself.
17+
///
18+
/// The following SIL pattern will be optimized:
19+
///
20+
/// sil @foo : $@convention(thin) <T> () -> @out T {
21+
/// bb0(%0 : $*T):
22+
/// %2 = alloc_stack $T
23+
/// ...
24+
/// copy_addr %some_value to [init] %2 // or any other writes to %2
25+
/// ...
26+
/// bbN:
27+
/// copy_addr [take] %2 to [init] %0 : $*T // the only use of %0
28+
/// ... // no writes
29+
/// return
30+
///
31+
/// to:
32+
///
33+
/// sil @foo : $@convention(thin) <T> (@out T) -> () {
34+
/// bb0(%0 : $*T):
35+
/// %2 = alloc_stack $T // is dead now
36+
/// ...
37+
/// copy_addr %some_value to [init] %0
38+
/// ...
39+
/// bbN:
40+
/// ...
41+
/// return
42+
///
43+
/// This optimization can be done because we know that:
44+
/// * The out argument dominates all uses of the copy_addr's source (because it's a function argument).
45+
/// * It's not aliased (by definition). We can't allow aliases to be accessed between the initialization and the return.
46+
///
47+
/// This pass shouldn't run before serialization. It might prevent predictable memory optimizations
48+
/// in a caller after inlining, because the memory location (the out argument = an alloc_stack in the caller)
49+
/// might be written multiple times after this optimization.
50+
///
51+
let namedReturnValueOptimization = FunctionPass(name: "named-return-value-optimization") {
52+
(function: Function, context: FunctionPassContext) in
53+
54+
for outArg in function.arguments[0..<function.numIndirectResultArguments] {
55+
if let copyToArg = findCopyForNRVO(for: outArg) {
56+
performNRVO(with: copyToArg, context)
57+
}
58+
}
59+
}
60+
61+
/// Returns a copy_addr which copies from an alloc_stack to the `outArg` at the end of the function.
62+
///
63+
private func findCopyForNRVO(for outArg: FunctionArgument) -> CopyAddrInst? {
64+
guard let singleArgUse = outArg.uses.singleNonDebugUse,
65+
let copyToArg = singleArgUse.instruction as? CopyAddrInst else {
66+
return nil
67+
}
68+
69+
assert(singleArgUse == copyToArg.destinationOperand,
70+
"single use of out-argument cannot be the source of a copy")
71+
72+
// Don't perform NRVO unless the copy is a [take]. This is the easiest way
73+
// to determine that the local variable has ownership of its value and ensures
74+
// that removing a copy is a reference count neutral operation. For example,
75+
// this copy can't be trivially eliminated without adding a retain.
76+
// sil @f : $@convention(thin) (@guaranteed T) -> @out T
77+
// bb0(%in : $*T, %out : $T):
78+
// %local = alloc_stack $T
79+
// store %in to %local : $*T
80+
// copy_addr %local to [init] %out : $*T
81+
if !copyToArg.isTakeOfSrc {
82+
return nil
83+
}
84+
85+
guard let sourceStackAlloc = copyToArg.source as? AllocStackInst else {
86+
return nil
87+
}
88+
89+
// NRVO for alloc_stack [dynamic_lifetime] will invalidate OSSA invariants.
90+
if sourceStackAlloc.hasDynamicLifetime && copyToArg.parentFunction.hasOwnership {
91+
return nil
92+
}
93+
94+
if !(copyToArg.parentBlock.terminator is ReturnInst) {
95+
return nil
96+
}
97+
98+
// This check is overly conservative, because we only need to check if the source
99+
// of the copy is not written to. But the copy to the out argument is usually the last
100+
// instruction of the function, so it doesn't matter.
101+
if isAnyInstructionWritingToMemory(after: copyToArg) {
102+
return nil
103+
}
104+
105+
return copyToArg
106+
}
107+
108+
private func performNRVO(with copy: CopyAddrInst, _ context: FunctionPassContext) {
109+
copy.source.uses.replaceAllExceptDealloc(with: copy.destination, context)
110+
assert(copy.source == copy.destination)
111+
context.erase(instruction: copy)
112+
}
113+
114+
private func isAnyInstructionWritingToMemory(after: Instruction) -> Bool {
115+
var followingInst = after.next
116+
while let fi = followingInst {
117+
if fi.mayWriteToMemory && !(fi is DeallocStackInst) {
118+
return true
119+
}
120+
followingInst = fi.next
121+
}
122+
return false
123+
}
124+
125+
private extension UseList {
126+
func replaceAllExceptDealloc(with replacement: Value, _ context: some MutatingContext) {
127+
for use in self where !(use.instruction is Deallocation) {
128+
use.set(to: replacement, context)
129+
}
130+
}
131+
}

SwiftCompilerSources/Sources/Optimizer/PassManager/PassRegistration.swift

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ private func registerSwiftPasses() {
7878
registerPass(ononeSimplificationPass, { ononeSimplificationPass.run($0) })
7979
registerPass(lateOnoneSimplificationPass, { lateOnoneSimplificationPass.run($0) })
8080
registerPass(cleanupDebugStepsPass, { cleanupDebugStepsPass.run($0) })
81+
registerPass(namedReturnValueOptimization, { namedReturnValueOptimization.run($0) })
8182

8283
// Instruction passes
8384
registerForSILCombine(BeginCOWMutationInst.self, { run(BeginCOWMutationInst.self, $0) })

include/swift/SILOptimizer/PassManager/Passes.def

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -400,6 +400,8 @@ SWIFT_FUNCTION_PASS(LateOnoneSimplification, "late-onone-simplification",
400400
"Peephole simplifications which can only run late in the -Onone pipeline")
401401
SWIFT_FUNCTION_PASS(CleanupDebugSteps, "cleanup-debug-steps",
402402
"Cleanup debug_step instructions for Onone")
403+
SWIFT_FUNCTION_PASS(NamedReturnValueOptimization, "named-return-value-optimization",
404+
"Optimize copies to an indirect return value")
403405
PASS(SimplifyBBArgs, "simplify-bb-args",
404406
"SIL Block Argument Simplification")
405407
PASS(SimplifyCFG, "simplify-cfg",

lib/SILOptimizer/PassManager/PassPipeline.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -758,6 +758,10 @@ static void addLowLevelPassPipeline(SILPassPipelinePlan &P) {
758758

759759
addFunctionPasses(P, OptimizationLevelKind::LowLevel);
760760

761+
// The NamedReturnValueOptimization shouldn't be done before serialization.
762+
// For details see the comment for `namedReturnValueOptimization`.
763+
P.addNamedReturnValueOptimization();
764+
761765
P.addDeadObjectElimination();
762766
P.addObjectOutliner();
763767
P.addDeadStoreElimination();

lib/SILOptimizer/Transforms/CopyForwarding.cpp

Lines changed: 0 additions & 121 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,6 @@
7575
#include "llvm/Support/CommandLine.h"
7676
#include "llvm/Support/Debug.h"
7777

78-
STATISTIC(NumCopyNRVO, "Number of copies removed via named return value opt.");
7978
STATISTIC(NumCopyForward, "Number of copies removed via forward propagation");
8079
STATISTIC(NumCopyBackward,
8180
"Number of copies removed via backward propagation");
@@ -1213,114 +1212,6 @@ void CopyForwarding::forwardCopiesOf(SILValue Def, SILFunction *F) {
12131212
}
12141213
}
12151214

1216-
//===----------------------------------------------------------------------===//
1217-
// Named Return Value Optimization
1218-
//===----------------------------------------------------------------------===//
1219-
1220-
/// Return true if this copy can be eliminated through Named Return Value
1221-
/// Optimization (NRVO).
1222-
///
1223-
/// Simple NRVO cases are handled naturally via backwardPropagateCopy. However,
1224-
/// general NRVO is not handled via local propagation without global data
1225-
/// flow. Nonetheless, NRVO is a simple pattern that can be detected using a
1226-
/// different technique from propagation.
1227-
///
1228-
/// Example:
1229-
/// func nrvo<T : P>(z : Bool) -> T {
1230-
/// var rvo : T
1231-
/// if (z) {
1232-
/// rvo = T(10)
1233-
/// }
1234-
/// else {
1235-
/// rvo = T(1)
1236-
/// }
1237-
/// return rvo
1238-
/// }
1239-
///
1240-
/// Because of the control flow, backward propagation with a block will fail to
1241-
/// find the initializer for the copy at "return rvo". Instead, we directly
1242-
/// check for an NRVO pattern by observing a copy in a return block that is the
1243-
/// only use of the copy's dest, which must be an @out arg. If there are no
1244-
/// instructions between the copy and the return that may write to the copy's
1245-
/// source, we simply replace the source's local stack address with the @out
1246-
/// address.
1247-
///
1248-
/// The following SIL pattern will be detected:
1249-
///
1250-
/// sil @foo : $@convention(thin) <T> (@out T) -> () {
1251-
/// bb0(%0 : $*T):
1252-
/// %2 = alloc_stack $T
1253-
/// ... // arbitrary control flow, but no other uses of %0
1254-
/// bbN:
1255-
/// copy_addr [take] %2 to [init] %0 : $*T
1256-
/// ... // no writes
1257-
/// return
1258-
static bool canNRVO(CopyAddrInst *CopyInst) {
1259-
// Don't perform NRVO unless the copy is a [take]. This is the easiest way
1260-
// to determine that the local variable has ownership of its value and ensures
1261-
// that removing a copy is a reference count neutral operation. For example,
1262-
// this copy can't be trivially eliminated without adding a retain.
1263-
// sil @f : $@convention(thin) (@guaranteed T) -> @out T
1264-
// bb0(%in : $*T, %out : $T):
1265-
// %local = alloc_stack $T
1266-
// store %in to %local : $*T
1267-
// copy_addr %local to [init] %out : $*T
1268-
if (!CopyInst->isTakeOfSrc())
1269-
return false;
1270-
1271-
auto *asi = dyn_cast<AllocStackInst>(CopyInst->getSrc());
1272-
if (!asi || asi->hasDynamicLifetime())
1273-
return false;
1274-
1275-
// The copy's dest must be an indirect SIL argument. Otherwise, it may not
1276-
// dominate all uses of the source. Worse, it may be aliased. This
1277-
// optimization will early-initialize the copy dest, so we can't allow aliases
1278-
// to be accessed between the initialization and the return.
1279-
auto OutArg = dyn_cast<SILFunctionArgument>(CopyInst->getDest());
1280-
if (!OutArg)
1281-
return false;
1282-
1283-
if (!OutArg->isIndirectResult())
1284-
return false;
1285-
1286-
SILBasicBlock *BB = CopyInst->getParent();
1287-
if (!isa<ReturnInst>(BB->getTerminator()))
1288-
return false;
1289-
1290-
SILValue CopyDest = CopyInst->getDest();
1291-
if (!hasOneNonDebugUse(CopyDest))
1292-
return false;
1293-
1294-
auto SI = CopyInst->getIterator(), SE = BB->end();
1295-
for (++SI; SI != SE; ++SI) {
1296-
if (SI->mayWriteToMemory() && !isa<DeallocationInst>(SI))
1297-
return false;
1298-
}
1299-
return true;
1300-
}
1301-
1302-
/// Replace all uses of \p ASI by \p RHS, except the dealloc_stack.
1303-
static void replaceAllUsesExceptDealloc(AllocStackInst *ASI, ValueBase *RHS) {
1304-
llvm::SmallVector<Operand *, 8> Uses;
1305-
for (Operand *Use : ASI->getUses()) {
1306-
if (!isa<DeallocStackInst>(Use->getUser()))
1307-
Uses.push_back(Use);
1308-
}
1309-
for (Operand *Use : Uses) {
1310-
Use->set(RHS);
1311-
}
1312-
}
1313-
1314-
/// Remove a copy for which canNRVO returned true.
1315-
static void performNRVO(CopyAddrInst *CopyInst) {
1316-
LLVM_DEBUG(llvm::dbgs() << "NRVO eliminates copy" << *CopyInst);
1317-
++NumCopyNRVO;
1318-
replaceAllUsesExceptDealloc(cast<AllocStackInst>(CopyInst->getSrc()),
1319-
CopyInst->getDest());
1320-
assert(CopyInst->getSrc() == CopyInst->getDest() && "bad NRVO");
1321-
CopyInst->eraseFromParent();
1322-
}
1323-
13241215
//===----------------------------------------------------------------------===//
13251216
// CopyForwardingPass
13261217
//===----------------------------------------------------------------------===//
@@ -1361,16 +1252,10 @@ class CopyForwardingPass : public SILFunctionTransform
13611252

13621253
// Collect a set of identified objects (@in arg or alloc_stack) that are
13631254
// copied in this function.
1364-
// Collect a separate set of copies that can be removed via NRVO.
13651255
llvm::SmallSetVector<SILValue, 16> CopiedDefs;
1366-
llvm::SmallVector<CopyAddrInst*, 4> NRVOCopies;
13671256
for (auto &BB : *getFunction())
13681257
for (auto II = BB.begin(), IE = BB.end(); II != IE; ++II) {
13691258
if (auto *CopyInst = dyn_cast<CopyAddrInst>(&*II)) {
1370-
if (canNRVO(CopyInst)) {
1371-
NRVOCopies.push_back(CopyInst);
1372-
continue;
1373-
}
13741259
SILValue Def = CopyInst->getSrc();
13751260
if (isIdentifiedSourceValue(Def))
13761261
CopiedDefs.insert(Def);
@@ -1381,12 +1266,6 @@ class CopyForwardingPass : public SILFunctionTransform
13811266
}
13821267
}
13831268

1384-
// Perform NRVO
1385-
for (auto Copy : NRVOCopies) {
1386-
performNRVO(Copy);
1387-
invalidateAnalysis(SILAnalysis::InvalidationKind::CallsAndInstructions);
1388-
}
1389-
13901269
// Perform Copy Forwarding.
13911270
if (CopiedDefs.empty())
13921271
return;

test/SILOptimizer/copyforward_ossa.sil

Lines changed: 0 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -20,44 +20,6 @@ sil @f_in_guaranteed : $@convention(thin) <T> (@in_guaranteed T) -> ()
2020
sil @f_out : $@convention(thin) <T> () -> @out T
2121
sil @f_owned : $@convention(thin) <T> (@owned T) -> ()
2222

23-
protocol P {
24-
init(_ i : Int32)
25-
mutating func poke()
26-
};
27-
28-
// CHECK-LABEL: sil hidden [ossa] @nrvo :
29-
// CHECK-NOT: copy_addr
30-
// CHECK-LABEL: } // end sil function 'nrvo'
31-
sil hidden [ossa] @nrvo : $@convention(thin) <T where T : P> (Bool) -> @out T {
32-
bb0(%0 : $*T, %1 : $Bool):
33-
%2 = alloc_stack $T, var, name "ro" // users: %9, %15, %17, %19
34-
%3 = struct_extract %1 : $Bool, #Bool._value // user: %4
35-
cond_br %3, bb1, bb2 // id: %4
36-
37-
bb1: // Preds: bb0
38-
%5 = metatype $@thick T.Type // user: %9
39-
%6 = witness_method $T, #P.init!allocator : $@convention(witness_method: P) <τ_0_0 where τ_0_0 : P> (Int32, @thick τ_0_0.Type) -> @out τ_0_0 // user: %9
40-
%7 = integer_literal $Builtin.Int32, 10 // user: %8
41-
%8 = struct $Int32 (%7 : $Builtin.Int32) // user: %9
42-
%9 = apply %6<T>(%2, %8, %5) : $@convention(witness_method: P) <τ_0_0 where τ_0_0 : P> (Int32, @thick τ_0_0.Type) -> @out τ_0_0
43-
br bb3 // id: %10
44-
45-
bb2: // Preds: bb0
46-
%11 = metatype $@thick T.Type // user: %15
47-
%12 = witness_method $T, #P.init!allocator : $@convention(witness_method: P) <τ_0_0 where τ_0_0 : P> (Int32, @thick τ_0_0.Type) -> @out τ_0_0 // user: %15
48-
%13 = integer_literal $Builtin.Int32, 1 // user: %14
49-
%14 = struct $Int32 (%13 : $Builtin.Int32) // user: %15
50-
%15 = apply %12<T>(%2, %14, %11) : $@convention(witness_method: P) <τ_0_0 where τ_0_0 : P> (Int32, @thick τ_0_0.Type) -> @out τ_0_0
51-
br bb3 // id: %16
52-
53-
bb3: // Preds: bb1 bb2
54-
copy_addr [take] %2 to [init] %0 : $*T // id: %17
55-
%18 = tuple () // user: %20
56-
debug_value %0 : $*T, expr op_deref
57-
dealloc_stack %2 : $*T // id: %19
58-
return %18 : $() // id: %20
59-
}
60-
6123
// CHECK-LABEL: sil hidden [ossa] @forward_takeinit :
6224
// CHECK-NOT: copy_addr
6325
// CHECK-NOT: destroy_addr
@@ -312,24 +274,6 @@ bb3:
312274
return %13 : $()
313275
}
314276

315-
// CHECK-LABEL: sil hidden [ossa] @forward_unchecked_ref_cast_addr :
316-
// CHECK: unchecked_ref_cast_addr
317-
// CHECK-NOT: copy_addr
318-
// CHECK-LABEL: } // end sil function 'forward_unchecked_ref_cast_addr'
319-
sil hidden [ossa] @forward_unchecked_ref_cast_addr : $@convention(thin) (@in AnyObject) -> @out AClass {
320-
bb0(%0 : $*AClass, %1 : $*AnyObject):
321-
%3 = alloc_stack $AnyObject // user: %10
322-
%4 = alloc_stack $AnyObject // user: %9
323-
%5 = alloc_stack $AClass // users: %6, %7, %8
324-
unchecked_ref_cast_addr AnyObject in %1 : $*AnyObject to AClass in %5 : $*AClass // id: %6
325-
copy_addr [take] %5 to [init] %0 : $*AClass // id: %7
326-
dealloc_stack %5 : $*AClass // id: %8
327-
dealloc_stack %4 : $*AnyObject // id: %9
328-
dealloc_stack %3 : $*AnyObject // id: %10
329-
%11 = tuple () // user: %12
330-
return %11 : $() // id: %12
331-
}
332-
333277
public struct S<T> {
334278
@_hasStorage var f: T { get set }
335279
@_hasStorage var g: T { get set }

0 commit comments

Comments
 (0)