@@ -63,21 +63,33 @@ import SIL
63
63
///
64
64
let redundantLoadElimination = FunctionPass ( name: " redundant-load-elimination " ) {
65
65
( function: Function , context: FunctionPassContext ) in
66
- eliminateRedundantLoads ( in: function, ignoreArrays : false , context)
66
+ _ = eliminateRedundantLoads ( in: function, variant : . regular , context)
67
67
}
68
68
69
69
// Early RLE does not touch loads from Arrays. This is important because later array optimizations,
70
70
// like ABCOpt, get confused if an array load in a loop is converted to a pattern with a phi argument.
71
71
let earlyRedundantLoadElimination = FunctionPass ( name: " early-redundant-load-elimination " ) {
72
72
( function: Function , context: FunctionPassContext ) in
73
- eliminateRedundantLoads ( in: function, ignoreArrays : true , context)
73
+ _ = eliminateRedundantLoads ( in: function, variant : . early , context)
74
74
}
75
75
76
- private func eliminateRedundantLoads( in function: Function , ignoreArrays: Bool , _ context: FunctionPassContext ) {
76
+ let mandatoryRedundantLoadElimination = FunctionPass ( name: " mandatory-redundant-load-elimination " ) {
77
+ ( function: Function , context: FunctionPassContext ) in
78
+ _ = eliminateRedundantLoads ( in: function, variant: . mandatory, context)
79
+ }
80
+
81
+ enum RedundantLoadEliminationVariant {
82
+ case mandatory, mandatoryInGlobalInit, early, regular
83
+ }
77
84
85
+ func eliminateRedundantLoads( in function: Function ,
86
+ variant: RedundantLoadEliminationVariant ,
87
+ _ context: FunctionPassContext ) -> Bool
88
+ {
78
89
// Avoid quadratic complexity by limiting the number of visited instructions.
79
90
// This limit is sufficient for most "real-world" functions, by far.
80
91
var complexityBudget = 50_000
92
+ var changed = false
81
93
82
94
for block in function. blocks. reversed ( ) {
83
95
@@ -89,50 +101,76 @@ private func eliminateRedundantLoads(in function: Function, ignoreArrays: Bool,
89
101
90
102
if let load = inst as? LoadInst {
91
103
if !context. continueWithNextSubpassRun ( for: load) {
92
- return
104
+ return changed
93
105
}
94
- if ignoreArrays,
95
- let nominal = load. type. nominal,
96
- nominal == context. swiftArrayDecl
97
- {
98
- continue
106
+ if complexityBudget < 20 {
107
+ complexityBudget = 20
99
108
}
100
- // Check if the type can be expanded without a significant increase to
101
- // code size.
102
- // We block redundant load elimination because it might increase
103
- // register pressure for large values. Furthermore, this pass also
104
- // splits values into its projections (e.g
105
- // shrinkMemoryLifetimeAndSplit).
106
- if !load. type. shouldExpand ( context) {
107
- continue
109
+ if !load. isEligibleForElimination ( in: variant, context) {
110
+ continue ;
108
111
}
109
- tryEliminate ( load: load, complexityBudget: & complexityBudget, context)
112
+ changed = tryEliminate ( load: load, complexityBudget: & complexityBudget, context) || changed
110
113
}
111
114
}
112
115
}
116
+ return changed
113
117
}
114
118
115
- private func tryEliminate( load: LoadInst , complexityBudget: inout Int , _ context: FunctionPassContext ) {
119
+ private func tryEliminate( load: LoadInst , complexityBudget: inout Int , _ context: FunctionPassContext ) -> Bool {
116
120
switch load. isRedundant ( complexityBudget: & complexityBudget, context) {
117
121
case . notRedundant:
118
- break
122
+ return false
119
123
case . redundant( let availableValues) :
120
124
replace ( load: load, with: availableValues, context)
125
+ return true
121
126
case . maybePartiallyRedundant( let subPath) :
122
127
// Check if the a partial load would really be redundant to avoid unnecessary splitting.
123
128
switch load. isRedundant ( at: subPath, complexityBudget: & complexityBudget, context) {
124
129
case . notRedundant, . maybePartiallyRedundant:
125
- break
130
+ return false
126
131
case . redundant:
127
132
// The new individual loads are inserted right before the current load and
128
133
// will be optimized in the following loop iterations.
129
- load. trySplit ( context)
134
+ return load. trySplit ( context)
130
135
}
131
136
}
132
137
}
133
138
134
139
private extension LoadInst {
135
140
141
+ func isEligibleForElimination( in variant: RedundantLoadEliminationVariant , _ context: FunctionPassContext ) -> Bool {
142
+ switch variant {
143
+ case . mandatory, . mandatoryInGlobalInit:
144
+ if loadOwnership == . take {
145
+ // load [take] would require to shrinkMemoryLifetime. But we don't want to do this in the mandatory
146
+ // pipeline to not shrink or remove an alloc_stack which is relevant for debug info.
147
+ return false
148
+ }
149
+ switch address. accessBase {
150
+ case . box, . stack:
151
+ break
152
+ default :
153
+ return false
154
+ }
155
+ case . early:
156
+ // See the comment of `earlyRedundantLoadElimination`.
157
+ if let nominal = self . type. nominal, nominal == context. swiftArrayDecl {
158
+ return false
159
+ }
160
+ case . regular:
161
+ break
162
+ }
163
+ // Check if the type can be expanded without a significant increase to code size.
164
+ // We block redundant load elimination because it might increase register pressure for large values.
165
+ // Furthermore, this pass also splits values into its projections (e.g shrinkMemoryLifetimeAndSplit).
166
+ // But: it is required to remove loads, even of large structs, in global init functions to ensure
167
+ // that globals (containing large structs) can be statically initialized.
168
+ if variant != . mandatoryInGlobalInit, !self . type. shouldExpand ( context) {
169
+ return false
170
+ }
171
+ return true
172
+ }
173
+
136
174
enum DataflowResult {
137
175
case notRedundant
138
176
case redundant( [ AvailableValue ] )
@@ -251,7 +289,7 @@ private func replace(load: LoadInst, with availableValues: [AvailableValue], _ c
251
289
var ssaUpdater = SSAUpdater ( function: load. parentFunction,
252
290
type: load. type, ownership: load. ownership, context)
253
291
254
- for availableValue in availableValues {
292
+ for availableValue in availableValues. replaceCopyAddrsWithLoadsAndStores ( context ) {
255
293
let block = availableValue. instruction. parentBlock
256
294
let availableValue = provideValue ( for: load, from: availableValue, context)
257
295
ssaUpdater. addAvailableValue ( availableValue, in: block)
@@ -279,6 +317,10 @@ private func replace(load: LoadInst, with availableValues: [AvailableValue], _ c
279
317
//
280
318
newValue = ssaUpdater. getValue ( inMiddleOf: load. parentBlock)
281
319
}
320
+
321
+ // Make sure to keep dependencies valid after replacing the load
322
+ insertMarkDependencies ( for: load, context)
323
+
282
324
load. replace ( with: newValue, context)
283
325
}
284
326
@@ -306,6 +348,39 @@ private func provideValue(
306
348
}
307
349
}
308
350
351
+ /// If the memory location depends on something, insert a dependency for the loaded value:
352
+ ///
353
+ /// %2 = mark_dependence %1 on %0
354
+ /// %3 = load %2
355
+ /// ->
356
+ /// %2 = mark_dependence %1 on %0 // not needed anymore, can be removed eventually
357
+ /// %3 = load %2
358
+ /// %4 = mark_dependence %3 on %0
359
+ /// // replace %3 with %4
360
+ ///
361
+ private func insertMarkDependencies( for load: LoadInst , _ context: FunctionPassContext ) {
362
+ var inserter = MarkDependenceInserter ( load: load, context: context)
363
+ _ = inserter. walkUp ( address: load. address, path: UnusedWalkingPath ( ) )
364
+ }
365
+
366
+ private struct MarkDependenceInserter : AddressUseDefWalker {
367
+ let load : LoadInst
368
+ let context : FunctionPassContext
369
+
370
+ mutating func walkUp( address: Value , path: UnusedWalkingPath ) -> WalkResult {
371
+ if let mdi = address as? MarkDependenceInst {
372
+ let builder = Builder ( after: load, context)
373
+ let newMdi = builder. createMarkDependence ( value: load, base: mdi. base, kind: mdi. dependenceKind)
374
+ load. uses. ignore ( user: newMdi) . replaceAll ( with: newMdi, context)
375
+ }
376
+ return walkUpDefault ( address: address, path: path)
377
+ }
378
+
379
+ mutating func rootDef( address: Value , path: UnusedWalkingPath ) -> WalkResult {
380
+ return . continueWalk
381
+ }
382
+ }
383
+
309
384
/// In case of a `load [take]` shrink lifetime of the value in memory back to the `availableValue`
310
385
/// and return the (possibly projected) available value. For example:
311
386
///
@@ -342,6 +417,8 @@ private func shrinkMemoryLifetime(from load: LoadInst, to availableValue: Availa
342
417
fatalError ( " unqualified store in ossa function? " )
343
418
}
344
419
return valueToAdd
420
+ case . viaCopyAddr:
421
+ fatalError ( " copy_addr must be lowered before shrinking lifetime " )
345
422
}
346
423
}
347
424
@@ -380,39 +457,58 @@ private func shrinkMemoryLifetimeAndSplit(from load: LoadInst, to availableValue
380
457
let valueToAdd = builder. createLoad ( fromAddress: addr, ownership: . take)
381
458
availableStore. trySplit ( context)
382
459
return valueToAdd
460
+ case . viaCopyAddr:
461
+ fatalError ( " copy_addr must be lowered before shrinking lifetime " )
383
462
}
384
463
}
385
464
386
465
/// Either a `load` or `store` which is preceding the original load and provides the loaded value.
387
466
private enum AvailableValue {
388
467
case viaLoad( LoadInst )
389
468
case viaStore( StoreInst )
469
+ case viaCopyAddr( CopyAddrInst )
390
470
391
471
var value : Value {
392
472
switch self {
393
473
case . viaLoad( let load) : return load
394
474
case . viaStore( let store) : return store. source
475
+ case . viaCopyAddr: fatalError ( " copy_addr must be lowered " )
395
476
}
396
477
}
397
478
398
479
var address : Value {
399
480
switch self {
400
- case . viaLoad( let load) : return load. address
401
- case . viaStore( let store) : return store. destination
481
+ case . viaLoad( let load) : return load. address
482
+ case . viaStore( let store) : return store. destination
483
+ case . viaCopyAddr( let copyAddr) : return copyAddr. destination
402
484
}
403
485
}
404
486
405
487
var instruction : Instruction {
406
488
switch self {
407
- case . viaLoad( let load) : return load
408
- case . viaStore( let store) : return store
489
+ case . viaLoad( let load) : return load
490
+ case . viaStore( let store) : return store
491
+ case . viaCopyAddr( let copyAddr) : return copyAddr
409
492
}
410
493
}
411
494
412
495
func getBuilderForProjections( _ context: FunctionPassContext ) -> Builder {
413
496
switch self {
414
497
case . viaLoad( let load) : return Builder ( after: load, context)
415
498
case . viaStore( let store) : return Builder ( before: store, context)
499
+ case . viaCopyAddr: fatalError ( " copy_addr must be lowered " )
500
+ }
501
+ }
502
+ }
503
+
504
+ private extension Array where Element == AvailableValue {
505
+ func replaceCopyAddrsWithLoadsAndStores( _ context: FunctionPassContext ) -> [ AvailableValue ] {
506
+ return map {
507
+ if case . viaCopyAddr( let copyAddr) = $0 {
508
+ return . viaStore( copyAddr. replaceWithLoadAndStore ( context) )
509
+ } else {
510
+ return $0
511
+ }
416
512
}
417
513
}
418
514
}
@@ -479,7 +575,7 @@ private struct InstructionScanner {
479
575
480
576
private mutating func visit( instruction: Instruction ) -> ScanResult {
481
577
switch instruction {
482
- case is FixLifetimeInst , is EndAccessInst , is EndBorrowInst :
578
+ case is FixLifetimeInst , is BeginAccessInst , is EndAccessInst , is EndBorrowInst :
483
579
// Those scope-ending instructions are only irrelevant if the preceding load is not changed.
484
580
// If it is changed from `load [copy]` -> `load [take]` the memory effects of those scope-ending
485
581
// instructions prevent that the `load [take]` will illegally mutate memory which is protected
@@ -520,6 +616,16 @@ private struct InstructionScanner {
520
616
potentiallyRedundantSubpath = precedingStorePath
521
617
}
522
618
619
+ case let preceedingCopy as CopyAddrInst where preceedingCopy. canProvideValue:
620
+ let copyPath = preceedingCopy. destination. constantAccessPath
621
+ if copyPath. getMaterializableProjection ( to: accessPath) != nil {
622
+ availableValues. append ( . viaCopyAddr( preceedingCopy) )
623
+ return . available
624
+ }
625
+ if accessPath. getMaterializableProjection ( to: copyPath) != nil , potentiallyRedundantSubpath == nil {
626
+ potentiallyRedundantSubpath = copyPath
627
+ }
628
+
523
629
default :
524
630
break
525
631
}
@@ -606,3 +712,20 @@ private struct Liverange {
606
712
return false
607
713
}
608
714
}
715
+
716
+ private extension CopyAddrInst {
717
+ var canProvideValue : Bool {
718
+ if !source. type. isLoadable ( in: parentFunction) {
719
+ // Although the original load's type is loadable (obviously), it can be projected-out
720
+ // from the copy_addr's type which might be not loadable.
721
+ return false
722
+ }
723
+ if !parentFunction. hasOwnership {
724
+ if !isTakeOfSrc || !isInitializationOfDest {
725
+ // For simplicity, bail if we would have to insert compensating retains and releases.
726
+ return false
727
+ }
728
+ }
729
+ return true
730
+ }
731
+ }
0 commit comments