Skip to content

Commit 5153b34

Browse files
committed
Optimizer: improve TempLValueOpt
* re-implement the pass in swift * support alloc_stack liveranges which span over multiple basic blocks * support `load`-`store` pairs, copying from the alloc_stack (in addition to `copy_addr`) Those improvements help to reduce temporary stack allocations, especially for InlineArrays. rdar://151606382
1 parent e6fb8e4 commit 5153b34

File tree

14 files changed

+580
-413
lines changed

14 files changed

+580
-413
lines changed

SwiftCompilerSources/Sources/Optimizer/FunctionPasses/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,5 +33,6 @@ swift_compiler_sources(Optimizer
3333
SimplificationPasses.swift
3434
StackPromotion.swift
3535
StripObjectHeaders.swift
36+
TempLValueElimination.swift
3637
TempRValueElimination.swift
3738
)
Lines changed: 317 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,317 @@
1+
//===--- TempLValueElimination.swift ---------------------------------------==//
2+
//
3+
// This source file is part of the Swift.org open source project
4+
//
5+
// Copyright (c) 2014 - 2025 Apple Inc. and the Swift project authors
6+
// Licensed under Apache License v2.0 with Runtime Library Exception
7+
//
8+
// See https://swift.org/LICENSE.txt for license information
9+
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
import AST
14+
import SIL
15+
16+
/// Eliminates copies from a temporary (an "l-value") to a destination.
17+
///
18+
/// ```
19+
/// %temp = alloc_stack $T
20+
/// ... -+
21+
/// store %x to %temp | no reads or writes to %destination
22+
/// ... -+
23+
/// copy_addr [take] %temp to [init] %destination
24+
/// dealloc_stack %temp
25+
/// ```
26+
/// ->
27+
/// ```
28+
/// ...
29+
/// store %x to %destination
30+
/// ...
31+
/// ```
32+
///
33+
/// The name TempLValueElimination refers to the TempRValueElimination pass, which performs
34+
/// a related transformation, just with the temporary on the "right" side.
35+
///
36+
/// The pass also performs a peephole optimization on `copy_addr` - `destroy_addr` sequences.
37+
/// It replaces
38+
///
39+
/// ```
40+
/// copy_addr %source to %destination
41+
/// destroy_addr %source
42+
/// ```
43+
/// ->
44+
/// ```
45+
/// copy_addr [take] %source to %destination
46+
/// ```
47+
///
48+
let tempLValueElimination = FunctionPass(name: "temp-lvalue-elimination") {
49+
(function: Function, context: FunctionPassContext) in
50+
51+
for inst in function.instructions {
52+
switch inst {
53+
case let copy as CopyAddrInst:
54+
combineWithDestroy(copy: copy, context)
55+
tryEliminate(copy: copy, context)
56+
case let store as StoreInst:
57+
// Also handle `load`-`store` pairs which are basically the same thing as a `copy_addr`.
58+
if let load = store.source as? LoadInst, load.uses.isSingleUse, load.parentBlock == store.parentBlock {
59+
tryEliminate(copy: store, context)
60+
}
61+
default:
62+
break
63+
}
64+
}
65+
}
66+
67+
private func tryEliminate(copy: CopyLikeInstruction, _ context: FunctionPassContext) {
68+
guard let allocStack = copy.sourceAddress as? AllocStackInst,
69+
allocStack.isDeallocatedInSameBlock(as: copy)
70+
else {
71+
return
72+
}
73+
let isTrivial = allocStack.type.isTrivial(in: copy.parentFunction)
74+
guard copy.isTakeOfSource || isTrivial else {
75+
return
76+
}
77+
78+
// We need to move all destination address projections at the begin of the alloc_stack liverange,
79+
// because we are replacing the alloc_stack uses with the destination.
80+
// ```
81+
// %destination = struct_element_addr %1
82+
// stores to %temp --> stores to %destination
83+
// %destination = struct_element_addr %1
84+
// copy_addr [take] %temp to %destination
85+
// ```
86+
var projections = InstructionSet(context)
87+
defer { projections.deinitialize() }
88+
let destinationRootAddress = collectMovableProjections(of: copy.destinationAddress, in: &projections)
89+
90+
// If true we need to explicitly destroy the destination at the begin of the liverange.
91+
// ```
92+
// destroy_addr %destination
93+
// stores to %temp --> stores to %destination
94+
// copy_addr [take] %temp to %destination
95+
// ```
96+
let needDestroyEarly = !copy.isInitializationOfDestination && !isTrivial
97+
98+
let aliasAnalysis = context.aliasAnalysis
99+
let calleeAnalysis = context.calleeAnalysis
100+
101+
if aliasAnalysis.mayAlias(allocStack, copy.destinationAddress) {
102+
// Catch the very unusual corner case where the copy is writing back to it's source address - the alloc_stack.
103+
return
104+
}
105+
106+
var worklist = InstructionWorklist(context)
107+
defer { worklist.deinitialize() }
108+
worklist.pushIfNotVisited(allocStack)
109+
110+
var firstUseOfAllocStack: Instruction? = nil
111+
112+
// Check instructions within the liverange of the alloc_stack.
113+
while let inst = worklist.pop() {
114+
115+
if firstUseOfAllocStack == nil {
116+
// Here the liverange actually begins. For simplicity, it begins at the end of the first block at latest.
117+
if inst.isUsing(allocStack) || inst is TermInst {
118+
firstUseOfAllocStack = inst
119+
}
120+
}
121+
if firstUseOfAllocStack != nil {
122+
// If the destination root address is within the liverange it would prevent moving the projections
123+
// before the first use. Note that if the defining instruction of `destinationRootAddress` is nil
124+
// it can only be a function argument.
125+
if inst == destinationRootAddress.definingInstruction {
126+
return
127+
}
128+
129+
// Check if the destination is not accessed within the liverange of the temporary.
130+
// This is unlikely, because the destination is initialized at the copy.
131+
// But still, the destination could contain an initialized value which is destroyed before the copy.
132+
if inst.mayReadOrWrite(address: copy.destinationAddress, aliasAnalysis) &&
133+
// Needed to treat `init_existential_addr` as not-writing projection.
134+
!projections.contains(inst)
135+
{
136+
return
137+
}
138+
139+
// Check if replacing the alloc_stack with destination would invalidate the alias rules of indirect arguments.
140+
if let apply = inst as? FullApplySite,
141+
apply.hasInvalidArgumentAliasing(between: allocStack, and: copy.destinationAddress, aliasAnalysis)
142+
{
143+
return
144+
}
145+
146+
// We must not shrink the liverange of an existing value in the destination.
147+
if needDestroyEarly && inst.isDeinitBarrier(calleeAnalysis) {
148+
return
149+
}
150+
}
151+
152+
worklist.pushSuccessors(of: inst, ignoring: copy)
153+
}
154+
155+
guard let firstUseOfAllocStack else {
156+
// The alloc_stack is not written yet at the point of the copy. This is a very unusual corner case
157+
// which can only happen if the alloc_stack has an empty type (e.g. `$()`).
158+
return
159+
}
160+
161+
if allocStack.isReadOrWritten(after: copy.loadingInstruction, aliasAnalysis) {
162+
// Bail in the unlikely case of the alloc_stack is re-initialized after its value has been taken by `copy`.
163+
return
164+
}
165+
166+
moveProjections(of: copy.destinationAddress, within: worklist, before: firstUseOfAllocStack, context)
167+
168+
if needDestroyEarly {
169+
// Make sure the destination is uninitialized before the liverange of the temporary.
170+
let builder = Builder(before: firstUseOfAllocStack, context)
171+
builder.createDestroyAddr(address: copy.destinationAddress)
172+
}
173+
174+
// Replace all uses of the temporary with the destination address.
175+
for use in allocStack.uses {
176+
switch use.instruction {
177+
case let deallocStack as DeallocStackInst:
178+
context.erase(instruction: deallocStack)
179+
default:
180+
use.set(to: copy.destinationAddress, context)
181+
}
182+
}
183+
context.erase(instruction: allocStack)
184+
context.erase(instructionIncludingAllUsers: copy.loadingInstruction)
185+
}
186+
187+
private extension FullApplySite {
188+
/// Returns true if after replacing `addr1` with `addr2` the apply would have invalid aliasing of
189+
/// indirect arguments.
190+
/// An indirect argument (except `@inout_aliasable`) must not alias with another indirect argument.
191+
/// For example, if we would replace `addr1` with `addr2` in
192+
/// ```
193+
/// apply %f(%addr1, %addr2) : (@in T) -> @out T
194+
/// ```
195+
/// we would invalidate this rule.
196+
func hasInvalidArgumentAliasing(between addr1: Value, and addr2: Value, _ aliasAnalysis: AliasAnalysis) -> Bool {
197+
var addr1Accessed = false
198+
var addr2Accessed = false
199+
var mutatingAccess = false
200+
for argOp in argumentOperands {
201+
let convention = convention(of: argOp)!
202+
if convention.isExclusiveIndirect {
203+
if aliasAnalysis.mayAlias(addr1, argOp.value) {
204+
addr1Accessed = true
205+
if !convention.isGuaranteed {
206+
mutatingAccess = true
207+
}
208+
} else if aliasAnalysis.mayAlias(addr2, argOp.value) {
209+
addr2Accessed = true
210+
if !convention.isGuaranteed {
211+
mutatingAccess = true
212+
}
213+
}
214+
}
215+
}
216+
return mutatingAccess && addr1Accessed && addr2Accessed
217+
}
218+
}
219+
220+
/// Replace
221+
/// ```
222+
/// copy_addr %source to %destination --> copy_addr [take] %source to %destination
223+
/// destroy_addr %source
224+
/// ```
225+
private func combineWithDestroy(copy: CopyAddrInst, _ context: FunctionPassContext) {
226+
guard !copy.isTakeOfSource,
227+
let destroy = copy.source.uses.users(ofType: DestroyAddrInst.self).first,
228+
destroy.parentBlock == copy.parentBlock
229+
else {
230+
return
231+
}
232+
233+
// Check if the destroy_addr is after the copy_addr and if there are no memory accesses between them.
234+
var debugInsts = Stack<DebugValueInst>(context)
235+
defer { debugInsts.deinitialize() }
236+
237+
for inst in InstructionList(first: copy.next) {
238+
if inst == destroy {
239+
break
240+
}
241+
if let debugInst = inst as? DebugValueInst, debugInst.operand.value == copy.source {
242+
debugInsts.append(debugInst)
243+
}
244+
if inst.mayReadOrWriteMemory {
245+
return
246+
}
247+
}
248+
copy.set(isTakeOfSource: true, context)
249+
context.erase(instruction: destroy)
250+
// Don't let debug info think that the value is still valid after the `copy [take]`.
251+
context.erase(instructions: debugInsts)
252+
}
253+
254+
private extension Value {
255+
var isMovableProjection: (SingleValueInstruction & UnaryInstruction)? {
256+
switch self {
257+
case let projectionInst as InitEnumDataAddrInst: return projectionInst
258+
case let projectionInst as StructElementAddrInst: return projectionInst
259+
case let projectionInst as TupleElementAddrInst: return projectionInst
260+
case let projectionInst as UncheckedTakeEnumDataAddrInst: return projectionInst
261+
case let projectionInst as InitExistentialAddrInst: return projectionInst
262+
case let projectionInst as RefElementAddrInst: return projectionInst
263+
case let projectionInst as RefTailAddrInst: return projectionInst
264+
case let projectionInst as ProjectBoxInst: return projectionInst
265+
default: return nil
266+
}
267+
}
268+
}
269+
270+
private func collectMovableProjections(of address: Value, in projections: inout InstructionSet) -> Value {
271+
var a = address
272+
while let projection = a.isMovableProjection {
273+
projections.insert(projection)
274+
a = projection.operand.value
275+
}
276+
return a
277+
}
278+
279+
private func moveProjections(
280+
of address: Value,
281+
within worklist: InstructionWorklist,
282+
before insertionPoint: Instruction,
283+
_ context: FunctionPassContext
284+
) {
285+
var a = address
286+
var ip = insertionPoint
287+
while let projection = a.isMovableProjection,
288+
worklist.hasBeenPushed(projection)
289+
{
290+
projection.move(before: ip, context)
291+
a = projection.operand.value
292+
ip = projection
293+
}
294+
}
295+
296+
private extension AllocStackInst {
297+
func isReadOrWritten(after afterInst: Instruction, _ aliasAnalysis: AliasAnalysis) -> Bool {
298+
for inst in InstructionList(first: afterInst.next) {
299+
if let deallocStack = inst as? DeallocStackInst, deallocStack.allocatedValue == self {
300+
return false
301+
}
302+
if inst.mayReadOrWrite(address: self, aliasAnalysis) {
303+
return true
304+
}
305+
}
306+
fatalError("dealloc_stack expected to be in same block as `afterInst`")
307+
}
308+
309+
func isDeallocatedInSameBlock(as inst: Instruction) -> Bool {
310+
if let deallocStack = uses.users(ofType: DeallocStackInst.self).singleElement,
311+
deallocStack.parentBlock == inst.parentBlock
312+
{
313+
return true
314+
}
315+
return false
316+
}
317+
}

SwiftCompilerSources/Sources/Optimizer/PassManager/PassRegistration.swift

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ private func registerSwiftPasses() {
102102
registerPass(copyToBorrowOptimization, { copyToBorrowOptimization.run($0) })
103103
registerPass(tempRValueElimination, { tempRValueElimination.run($0) })
104104
registerPass(mandatoryTempRValueElimination, { mandatoryTempRValueElimination.run($0) })
105+
registerPass(tempLValueElimination, { tempLValueElimination.run($0) })
105106
registerPass(generalClosureSpecialization, { generalClosureSpecialization.run($0) })
106107
registerPass(autodiffClosureSpecialization, { autodiffClosureSpecialization.run($0) })
107108

include/swift/SILOptimizer/PassManager/Passes.def

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,8 @@ PASS(MandatoryTempRValueElimination, "mandatory-temp-rvalue-elimination",
144144
"Mandatory remove short-lived immutable temporary copies")
145145
PASS(TempRValueElimination, "temp-rvalue-elimination",
146146
"Remove short-lived immutable temporary copies")
147+
PASS(TempLValueElimination, "temp-lvalue-elimination",
148+
"Remove short-lived immutable temporary l-values")
147149

148150
// NOTE - ExperimentalSwiftBasedClosureSpecialization and AutodiffClosureSpecialization are a WIP
149151
PASS(ExperimentalSwiftBasedClosureSpecialization, "experimental-swift-based-closure-specialization",
@@ -406,8 +408,6 @@ LEGACY_PASS(PerformanceSILLinker, "performance-linker",
406408
"Deserialize all referenced SIL functions")
407409
LEGACY_PASS(RawSILInstLowering, "raw-sil-inst-lowering",
408410
"Lower all raw SIL instructions to canonical equivalents.")
409-
LEGACY_PASS(TempLValueOpt, "temp-lvalue-opt",
410-
"Remove short-lived immutable temporary l-values")
411411
LEGACY_PASS(IRGenPrepare, "irgen-prepare",
412412
"Cleanup SIL in preparation for IRGen")
413413
LEGACY_PASS(SendNonSendable, "send-non-sendable",

lib/SILOptimizer/PassManager/PassPipeline.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -431,7 +431,7 @@ void addFunctionPasses(SILPassPipelinePlan &P,
431431
P.addDCE();
432432

433433
// Optimize copies from a temporary (an "l-value") to a destination.
434-
P.addTempLValueOpt();
434+
P.addTempLValueElimination();
435435

436436
// Split up opaque operations (copy_addr, retain_value, etc.).
437437
P.addLowerAggregateInstrs();

lib/SILOptimizer/Transforms/CMakeLists.txt

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,5 @@ target_sources(swiftSILOptimizer PRIVATE
3232
SimplifyCFG.cpp
3333
Sink.cpp
3434
SpeculativeDevirtualizer.cpp
35-
StringOptimization.cpp
36-
TempLValueOpt.cpp)
35+
StringOptimization.cpp)
3736

0 commit comments

Comments
 (0)