@@ -223,6 +223,8 @@ extern cl::opt<bool> EnableMatrix;
223
223
224
224
extern cl::opt<bool > DisablePreInliner;
225
225
extern cl::opt<int > PreInlineThreshold;
226
+
227
+ extern cl::opt<bool > SYCLOptimizationMode;
226
228
} // namespace llvm
227
229
228
230
void PassBuilder::invokePeepholeEPCallbacks (FunctionPassManager &FPM,
@@ -271,78 +273,88 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
271
273
// Form canonically associated expression trees, and simplify the trees using
272
274
// basic mathematical properties. For example, this will form (nearly)
273
275
// minimal multiplication trees.
274
- FPM.addPass (ReassociatePass ());
275
-
276
- // Add the primary loop simplification pipeline.
277
- // FIXME: Currently this is split into two loop pass pipelines because we run
278
- // some function passes in between them. These can and should be removed
279
- // and/or replaced by scheduling the loop pass equivalents in the correct
280
- // positions. But those equivalent passes aren't powerful enough yet.
281
- // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
282
- // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
283
- // fully replace `SimplifyCFGPass`, and the closest to the other we have is
284
- // `LoopInstSimplify`.
285
- LoopPassManager LPM1, LPM2;
286
-
287
- // Simplify the loop body. We do this initially to clean up after other loop
288
- // passes run, either when iterating on a loop or on inner loops with
289
- // implications on the outer loop.
290
- LPM1.addPass (LoopInstSimplifyPass ());
291
- LPM1.addPass (LoopSimplifyCFGPass ());
292
-
293
- // Try to remove as much code from the loop header as possible,
294
- // to reduce amount of IR that will have to be duplicated.
295
- // TODO: Investigate promotion cap for O1.
296
- LPM1.addPass (LICMPass (PTO.LicmMssaOptCap , PTO.LicmMssaNoAccForPromotionCap ));
297
-
298
- LPM1.addPass (LoopRotatePass (/* Disable header duplication */ true ,
299
- isLTOPreLink (Phase)));
300
- // TODO: Investigate promotion cap for O1.
301
- LPM1.addPass (LICMPass (PTO.LicmMssaOptCap , PTO.LicmMssaNoAccForPromotionCap ));
302
- LPM1.addPass (SimpleLoopUnswitchPass ());
303
-
304
- LPM2.addPass (LoopIdiomRecognizePass ());
305
- LPM2.addPass (IndVarSimplifyPass ());
306
-
307
- for (auto &C : LateLoopOptimizationsEPCallbacks)
308
- C (LPM2, Level);
309
-
310
- LPM2.addPass (LoopDeletionPass ());
311
-
312
- if (EnableLoopInterchange)
313
- LPM2.addPass (LoopInterchangePass ());
314
-
315
- // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
316
- // because it changes IR to makes profile annotation in back compile
317
- // inaccurate. The normal unroller doesn't pay attention to forced full unroll
318
- // attributes so we need to make sure and allow the full unroll pass to pay
319
- // attention to it.
320
- if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
321
- PGOOpt->Action != PGOOptions::SampleUse)
322
- LPM2.addPass (LoopFullUnrollPass (Level.getSpeedupLevel (),
323
- /* OnlyWhenForced= */ !PTO.LoopUnrolling ,
324
- PTO.ForgetAllSCEVInLoopUnroll ));
325
-
326
- for (auto &C : LoopOptimizerEndEPCallbacks)
327
- C (LPM2, Level);
328
-
329
- // We provide the opt remark emitter pass for LICM to use. We only need to do
330
- // this once as it is immutable.
331
- FPM.addPass (
332
- RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
333
- FPM.addPass (createFunctionToLoopPassAdaptor (std::move (LPM1),
334
- /* UseMemorySSA=*/ true ,
335
- /* UseBlockFrequencyInfo=*/ true ));
336
- FPM.addPass (SimplifyCFGPass ());
337
- FPM.addPass (InstCombinePass ());
338
- if (EnableLoopFlatten)
339
- FPM.addPass (createFunctionToLoopPassAdaptor (LoopFlattenPass ()));
340
- // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
341
- // *All* loop passes must preserve it, in order to be able to use it.
342
- FPM.addPass (createFunctionToLoopPassAdaptor (std::move (LPM2),
343
- /* UseMemorySSA=*/ false ,
344
- /* UseBlockFrequencyInfo=*/ false ));
276
+ if (!SYCLOptimizationMode) {
277
+ // FIXME: re-association increases variables liveness and therefore register
278
+ // pressure.
279
+ FPM.addPass (ReassociatePass ());
280
+
281
+ // Do not run loop pass pipeline in "SYCL Optimization Mode". Loop
282
+ // optimizations rely on TTI, which is not accurate for SPIR target.
283
+
284
+ // Add the primary loop simplification pipeline.
285
+ // FIXME: Currently this is split into two loop pass pipelines because we
286
+ // run some function passes in between them. These can and should be removed
287
+ // and/or replaced by scheduling the loop pass equivalents in the correct
288
+ // positions. But those equivalent passes aren't powerful enough yet.
289
+ // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
290
+ // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet
291
+ // to fully replace `SimplifyCFGPass`, and the closest to the other we have
292
+ // is `LoopInstSimplify`.
293
+ LoopPassManager LPM1, LPM2;
294
+
295
+ // Simplify the loop body. We do this initially to clean up after other loop
296
+ // passes run, either when iterating on a loop or on inner loops with
297
+ // implications on the outer loop.
298
+ LPM1.addPass (LoopInstSimplifyPass ());
299
+ LPM1.addPass (LoopSimplifyCFGPass ());
300
+
301
+ // Try to remove as much code from the loop header as possible,
302
+ // to reduce amount of IR that will have to be duplicated.
303
+ // TODO: Investigate promotion cap for O1.
304
+ LPM1.addPass (
305
+ LICMPass (PTO.LicmMssaOptCap , PTO.LicmMssaNoAccForPromotionCap ));
306
+
307
+ LPM1.addPass (LoopRotatePass (/* Disable header duplication */ true ,
308
+ isLTOPreLink (Phase)));
309
+ // TODO: Investigate promotion cap for O1.
310
+ LPM1.addPass (
311
+ LICMPass (PTO.LicmMssaOptCap , PTO.LicmMssaNoAccForPromotionCap ));
312
+ LPM1.addPass (SimpleLoopUnswitchPass ());
313
+
314
+ LPM2.addPass (LoopIdiomRecognizePass ());
315
+ LPM2.addPass (IndVarSimplifyPass ());
316
+
317
+ for (auto &C : LateLoopOptimizationsEPCallbacks)
318
+ C (LPM2, Level);
345
319
320
+ LPM2.addPass (LoopDeletionPass ());
321
+
322
+ if (EnableLoopInterchange)
323
+ LPM2.addPass (LoopInterchangePass ());
324
+
325
+ // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
326
+ // because it changes IR to makes profile annotation in back compile
327
+ // inaccurate. The normal unroller doesn't pay attention to forced full
328
+ // unroll attributes so we need to make sure and allow the full unroll pass
329
+ // to pay attention to it.
330
+ if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
331
+ PGOOpt->Action != PGOOptions::SampleUse)
332
+ LPM2.addPass (LoopFullUnrollPass (Level.getSpeedupLevel (),
333
+ /* OnlyWhenForced= */ !PTO.LoopUnrolling ,
334
+ PTO.ForgetAllSCEVInLoopUnroll ));
335
+
336
+ for (auto &C : LoopOptimizerEndEPCallbacks)
337
+ C (LPM2, Level);
338
+
339
+ // We provide the opt remark emitter pass for LICM to use. We only need to
340
+ // do this once as it is immutable.
341
+ FPM.addPass (
342
+ RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
343
+ FPM.addPass (
344
+ createFunctionToLoopPassAdaptor (std::move (LPM1),
345
+ /* UseMemorySSA=*/ true ,
346
+ /* UseBlockFrequencyInfo=*/ true ));
347
+ FPM.addPass (SimplifyCFGPass ());
348
+ FPM.addPass (InstCombinePass ());
349
+ if (EnableLoopFlatten)
350
+ FPM.addPass (createFunctionToLoopPassAdaptor (LoopFlattenPass ()));
351
+ // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
352
+ // *All* loop passes must preserve it, in order to be able to use it.
353
+ FPM.addPass (
354
+ createFunctionToLoopPassAdaptor (std::move (LPM2),
355
+ /* UseMemorySSA=*/ false ,
356
+ /* UseBlockFrequencyInfo=*/ false ));
357
+ }
346
358
// Delete small array after loop unroll.
347
359
FPM.addPass (SROAPass ());
348
360
@@ -443,80 +455,91 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
443
455
// Form canonically associated expression trees, and simplify the trees using
444
456
// basic mathematical properties. For example, this will form (nearly)
445
457
// minimal multiplication trees.
446
- FPM.addPass (ReassociatePass ());
447
-
448
- // Add the primary loop simplification pipeline.
449
- // FIXME: Currently this is split into two loop pass pipelines because we run
450
- // some function passes in between them. These can and should be removed
451
- // and/or replaced by scheduling the loop pass equivalents in the correct
452
- // positions. But those equivalent passes aren't powerful enough yet.
453
- // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
454
- // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
455
- // fully replace `SimplifyCFGPass`, and the closest to the other we have is
456
- // `LoopInstSimplify`.
457
- LoopPassManager LPM1, LPM2;
458
-
459
- // Simplify the loop body. We do this initially to clean up after other loop
460
- // passes run, either when iterating on a loop or on inner loops with
461
- // implications on the outer loop.
462
- LPM1.addPass (LoopInstSimplifyPass ());
463
- LPM1.addPass (LoopSimplifyCFGPass ());
464
-
465
- // Try to remove as much code from the loop header as possible,
466
- // to reduce amount of IR that will have to be duplicated.
467
- // TODO: Investigate promotion cap for O1.
468
- LPM1.addPass (LICMPass (PTO.LicmMssaOptCap , PTO.LicmMssaNoAccForPromotionCap ));
458
+ if (!SYCLOptimizationMode) {
459
+ // FIXME: re-association increases variables liveness and therefore register
460
+ // pressure.
461
+ FPM.addPass (ReassociatePass ());
462
+
463
+ // Do not run loop pass pipeline in "SYCL Optimization Mode". Loop
464
+ // optimizations rely on TTI, which is not accurate for SPIR target.
465
+
466
+ // Add the primary loop simplification pipeline.
467
+ // FIXME: Currently this is split into two loop pass pipelines because we
468
+ // run some function passes in between them. These can and should be removed
469
+ // and/or replaced by scheduling the loop pass equivalents in the correct
470
+ // positions. But those equivalent passes aren't powerful enough yet.
471
+ // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
472
+ // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet
473
+ // to fully replace `SimplifyCFGPass`, and the closest to the other we have
474
+ // is `LoopInstSimplify`.
475
+ LoopPassManager LPM1, LPM2;
476
+
477
+ // Simplify the loop body. We do this initially to clean up after other loop
478
+ // passes run, either when iterating on a loop or on inner loops with
479
+ // implications on the outer loop.
480
+ LPM1.addPass (LoopInstSimplifyPass ());
481
+ LPM1.addPass (LoopSimplifyCFGPass ());
482
+
483
+ // Try to remove as much code from the loop header as possible,
484
+ // to reduce amount of IR that will have to be duplicated.
485
+ // TODO: Investigate promotion cap for O1.
486
+ LPM1.addPass (
487
+ LICMPass (PTO.LicmMssaOptCap , PTO.LicmMssaNoAccForPromotionCap ));
488
+
489
+ // Disable header duplication in loop rotation at -Oz.
490
+ LPM1.addPass (
491
+ LoopRotatePass (Level != OptimizationLevel::Oz, isLTOPreLink (Phase)));
492
+ // TODO: Investigate promotion cap for O1.
493
+ LPM1.addPass (
494
+ LICMPass (PTO.LicmMssaOptCap , PTO.LicmMssaNoAccForPromotionCap ));
495
+ LPM1.addPass (SimpleLoopUnswitchPass (/* NonTrivial */ Level ==
496
+ OptimizationLevel::O3 &&
497
+ EnableO3NonTrivialUnswitching));
498
+ LPM2.addPass (LoopIdiomRecognizePass ());
499
+ LPM2.addPass (IndVarSimplifyPass ());
469
500
470
- // Disable header duplication in loop rotation at -Oz.
471
- LPM1.addPass (
472
- LoopRotatePass (Level != OptimizationLevel::Oz, isLTOPreLink (Phase)));
473
- // TODO: Investigate promotion cap for O1.
474
- LPM1.addPass (LICMPass (PTO.LicmMssaOptCap , PTO.LicmMssaNoAccForPromotionCap ));
475
- LPM1.addPass (
476
- SimpleLoopUnswitchPass (/* NonTrivial */ Level == OptimizationLevel::O3 &&
477
- EnableO3NonTrivialUnswitching));
478
- LPM2.addPass (LoopIdiomRecognizePass ());
479
- LPM2.addPass (IndVarSimplifyPass ());
480
-
481
- for (auto &C : LateLoopOptimizationsEPCallbacks)
482
- C (LPM2, Level);
483
-
484
- LPM2.addPass (LoopDeletionPass ());
485
-
486
- if (EnableLoopInterchange)
487
- LPM2.addPass (LoopInterchangePass ());
488
-
489
- // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
490
- // because it changes IR to makes profile annotation in back compile
491
- // inaccurate. The normal unroller doesn't pay attention to forced full unroll
492
- // attributes so we need to make sure and allow the full unroll pass to pay
493
- // attention to it.
494
- if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
495
- PGOOpt->Action != PGOOptions::SampleUse)
496
- LPM2.addPass (LoopFullUnrollPass (Level.getSpeedupLevel (),
497
- /* OnlyWhenForced= */ !PTO.LoopUnrolling ,
498
- PTO.ForgetAllSCEVInLoopUnroll ));
499
-
500
- for (auto &C : LoopOptimizerEndEPCallbacks)
501
- C (LPM2, Level);
502
-
503
- // We provide the opt remark emitter pass for LICM to use. We only need to do
504
- // this once as it is immutable.
505
- FPM.addPass (
506
- RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
507
- FPM.addPass (createFunctionToLoopPassAdaptor (std::move (LPM1),
508
- /* UseMemorySSA=*/ true ,
509
- /* UseBlockFrequencyInfo=*/ true ));
510
- FPM.addPass (SimplifyCFGPass ());
511
- FPM.addPass (InstCombinePass ());
512
- if (EnableLoopFlatten)
513
- FPM.addPass (createFunctionToLoopPassAdaptor (LoopFlattenPass ()));
514
- // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass,
515
- // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
516
- // *All* loop passes must preserve it, in order to be able to use it.
517
- FPM.addPass (createFunctionToLoopPassAdaptor (std::move (LPM2),
518
- /* UseMemorySSA=*/ false ,
519
- /* UseBlockFrequencyInfo=*/ false ));
501
+ for (auto &C : LateLoopOptimizationsEPCallbacks)
502
+ C (LPM2, Level);
503
+
504
+ LPM2.addPass (LoopDeletionPass ());
505
+
506
+ if (EnableLoopInterchange)
507
+ LPM2.addPass (LoopInterchangePass ());
508
+
509
+ // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
510
+ // because it changes IR to makes profile annotation in back compile
511
+ // inaccurate. The normal unroller doesn't pay attention to forced full
512
+ // unroll attributes so we need to make sure and allow the full unroll pass
513
+ // to pay attention to it.
514
+ if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
515
+ PGOOpt->Action != PGOOptions::SampleUse)
516
+ LPM2.addPass (LoopFullUnrollPass (Level.getSpeedupLevel (),
517
+ /* OnlyWhenForced= */ !PTO.LoopUnrolling ,
518
+ PTO.ForgetAllSCEVInLoopUnroll ));
519
+
520
+ for (auto &C : LoopOptimizerEndEPCallbacks)
521
+ C (LPM2, Level);
522
+
523
+ // We provide the opt remark emitter pass for LICM to use. We only need to
524
+ // do this once as it is immutable.
525
+ FPM.addPass (
526
+ RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
527
+ FPM.addPass (
528
+ createFunctionToLoopPassAdaptor (std::move (LPM1),
529
+ /* UseMemorySSA=*/ true ,
530
+ /* UseBlockFrequencyInfo=*/ true ));
531
+ FPM.addPass (SimplifyCFGPass ());
532
+ FPM.addPass (InstCombinePass ());
533
+ if (EnableLoopFlatten)
534
+ FPM.addPass (createFunctionToLoopPassAdaptor (LoopFlattenPass ()));
535
+ // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass,
536
+ // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
537
+ // *All* loop passes must preserve it, in order to be able to use it.
538
+ FPM.addPass (
539
+ createFunctionToLoopPassAdaptor (std::move (LPM2),
540
+ /* UseMemorySSA=*/ false ,
541
+ /* UseBlockFrequencyInfo=*/ false ));
542
+ }
520
543
521
544
// Delete small array after loop unroll.
522
545
FPM.addPass (SROAPass ());
@@ -1161,29 +1184,32 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
1161
1184
for (auto &C : VectorizerStartEPCallbacks)
1162
1185
C (OptimizePM, Level);
1163
1186
1164
- LoopPassManager LPM;
1165
- // First rotate loops that may have been un-rotated by prior passes.
1166
- // Disable header duplication at -Oz.
1167
- LPM.addPass (LoopRotatePass (Level != OptimizationLevel::Oz, LTOPreLink));
1168
- // Some loops may have become dead by now. Try to delete them.
1169
- // FIXME: see discussion in https://reviews.llvm.org/D112851,
1170
- // this may need to be revisited once we run GVN before loop deletion
1171
- // in the simplification pipeline.
1172
- LPM.addPass (LoopDeletionPass ());
1173
- OptimizePM.addPass (createFunctionToLoopPassAdaptor (
1174
- std::move (LPM), /* UseMemorySSA=*/ false , /* UseBlockFrequencyInfo=*/ false ));
1175
-
1176
- // Distribute loops to allow partial vectorization. I.e. isolate dependences
1177
- // into separate loop that would otherwise inhibit vectorization. This is
1178
- // currently only performed for loops marked with the metadata
1179
- // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
1180
- OptimizePM.addPass (LoopDistributePass ());
1181
-
1182
- // Populates the VFABI attribute with the scalar-to-vector mappings
1183
- // from the TargetLibraryInfo.
1184
- OptimizePM.addPass (InjectTLIMappings ());
1185
-
1186
- addVectorPasses (Level, OptimizePM, /* IsFullLTO */ false );
1187
+ if (!SYCLOptimizationMode) {
1188
+ LoopPassManager LPM;
1189
+ // First rotate loops that may have been un-rotated by prior passes.
1190
+ // Disable header duplication at -Oz.
1191
+ LPM.addPass (LoopRotatePass (Level != OptimizationLevel::Oz, LTOPreLink));
1192
+ // Some loops may have become dead by now. Try to delete them.
1193
+ // FIXME: see disscussion in https://reviews.llvm.org/D112851,
1194
+ // this may need to be revisited once we run GVN before loop deletion
1195
+ // in the simplification pipeline.
1196
+ LPM.addPass (LoopDeletionPass ());
1197
+ OptimizePM.addPass (
1198
+ createFunctionToLoopPassAdaptor (std::move (LPM), /* UseMemorySSA=*/ false ,
1199
+ /* UseBlockFrequencyInfo=*/ false ));
1200
+
1201
+ // Distribute loops to allow partial vectorization. I.e. isolate dependences
1202
+ // into separate loop that would otherwise inhibit vectorization. This is
1203
+ // currently only performed for loops marked with the metadata
1204
+ // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
1205
+ OptimizePM.addPass (LoopDistributePass ());
1206
+
1207
+ // Populates the VFABI attribute with the scalar-to-vector mappings
1208
+ // from the TargetLibraryInfo.
1209
+ OptimizePM.addPass (InjectTLIMappings ());
1210
+
1211
+ addVectorPasses (Level, OptimizePM, /* IsFullLTO */ false );
1212
+ }
1187
1213
1188
1214
// LoopSink pass sinks instructions hoisted by LICM, which serves as a
1189
1215
// canonicalization pass that enables other optimizations. As a result,
0 commit comments