@@ -223,6 +223,8 @@ extern cl::opt<bool> EnableMatrix;
223
223
224
224
extern cl::opt<bool > DisablePreInliner;
225
225
extern cl::opt<int > PreInlineThreshold;
226
+
227
+ extern cl::opt<bool > SYCLOptimizationMode;
226
228
} // namespace llvm
227
229
228
230
void PassBuilder::invokePeepholeEPCallbacks (FunctionPassManager &FPM,
@@ -271,78 +273,88 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
271
273
// Form canonically associated expression trees, and simplify the trees using
272
274
// basic mathematical properties. For example, this will form (nearly)
273
275
// minimal multiplication trees.
274
- FPM.addPass (ReassociatePass ());
275
-
276
- // Add the primary loop simplification pipeline.
277
- // FIXME: Currently this is split into two loop pass pipelines because we run
278
- // some function passes in between them. These can and should be removed
279
- // and/or replaced by scheduling the loop pass equivalents in the correct
280
- // positions. But those equivalent passes aren't powerful enough yet.
281
- // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
282
- // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
283
- // fully replace `SimplifyCFGPass`, and the closest to the other we have is
284
- // `LoopInstSimplify`.
285
- LoopPassManager LPM1, LPM2;
286
-
287
- // Simplify the loop body. We do this initially to clean up after other loop
288
- // passes run, either when iterating on a loop or on inner loops with
289
- // implications on the outer loop.
290
- LPM1.addPass (LoopInstSimplifyPass ());
291
- LPM1.addPass (LoopSimplifyCFGPass ());
292
-
293
- // Try to remove as much code from the loop header as possible,
294
- // to reduce amount of IR that will have to be duplicated.
295
- // TODO: Investigate promotion cap for O1.
296
- LPM1.addPass (LICMPass (PTO.LicmMssaOptCap , PTO.LicmMssaNoAccForPromotionCap ));
297
-
298
- LPM1.addPass (LoopRotatePass (/* Disable header duplication */ true ,
299
- isLTOPreLink (Phase)));
300
- // TODO: Investigate promotion cap for O1.
301
- LPM1.addPass (LICMPass (PTO.LicmMssaOptCap , PTO.LicmMssaNoAccForPromotionCap ));
302
- LPM1.addPass (SimpleLoopUnswitchPass ());
303
- if (EnableLoopFlatten)
304
- LPM1.addPass (LoopFlattenPass ());
305
-
306
- LPM2.addPass (LoopIdiomRecognizePass ());
307
- LPM2.addPass (IndVarSimplifyPass ());
308
-
309
- for (auto &C : LateLoopOptimizationsEPCallbacks)
310
- C (LPM2, Level);
311
-
312
- LPM2.addPass (LoopDeletionPass ());
313
-
314
- if (EnableLoopInterchange)
315
- LPM2.addPass (LoopInterchangePass ());
316
-
317
- // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
318
- // because it changes IR to makes profile annotation in back compile
319
- // inaccurate. The normal unroller doesn't pay attention to forced full unroll
320
- // attributes so we need to make sure and allow the full unroll pass to pay
321
- // attention to it.
322
- if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
323
- PGOOpt->Action != PGOOptions::SampleUse)
324
- LPM2.addPass (LoopFullUnrollPass (Level.getSpeedupLevel (),
325
- /* OnlyWhenForced= */ !PTO.LoopUnrolling ,
326
- PTO.ForgetAllSCEVInLoopUnroll ));
327
-
328
- for (auto &C : LoopOptimizerEndEPCallbacks)
329
- C (LPM2, Level);
330
-
331
- // We provide the opt remark emitter pass for LICM to use. We only need to do
332
- // this once as it is immutable.
333
- FPM.addPass (
334
- RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
335
- FPM.addPass (createFunctionToLoopPassAdaptor (std::move (LPM1),
336
- /* UseMemorySSA=*/ true ,
337
- /* UseBlockFrequencyInfo=*/ true ));
338
- FPM.addPass (SimplifyCFGPass ());
339
- FPM.addPass (InstCombinePass ());
340
- // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
341
- // *All* loop passes must preserve it, in order to be able to use it.
342
- FPM.addPass (createFunctionToLoopPassAdaptor (std::move (LPM2),
343
- /* UseMemorySSA=*/ false ,
344
- /* UseBlockFrequencyInfo=*/ false ));
276
+ if (!SYCLOptimizationMode) {
277
+ // FIXME: re-association increases variables liveness and therefore register
278
+ // pressure.
279
+ FPM.addPass (ReassociatePass ());
280
+
281
+ // Do not run loop pass pipeline in "SYCL Optimization Mode". Loop
282
+ // optimizations rely on TTI, which is not accurate for SPIR target.
283
+
284
+ // Add the primary loop simplification pipeline.
285
+ // FIXME: Currently this is split into two loop pass pipelines because we
286
+ // run some function passes in between them. These can and should be removed
287
+ // and/or replaced by scheduling the loop pass equivalents in the correct
288
+ // positions. But those equivalent passes aren't powerful enough yet.
289
+ // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
290
+ // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet
291
+ // to fully replace `SimplifyCFGPass`, and the closest to the other we have
292
+ // is `LoopInstSimplify`.
293
+ LoopPassManager LPM1, LPM2;
294
+
295
+ // Simplify the loop body. We do this initially to clean up after other loop
296
+ // passes run, either when iterating on a loop or on inner loops with
297
+ // implications on the outer loop.
298
+ LPM1.addPass (LoopInstSimplifyPass ());
299
+ LPM1.addPass (LoopSimplifyCFGPass ());
300
+
301
+ // Try to remove as much code from the loop header as possible,
302
+ // to reduce amount of IR that will have to be duplicated.
303
+ // TODO: Investigate promotion cap for O1.
304
+ LPM1.addPass (
305
+ LICMPass (PTO.LicmMssaOptCap , PTO.LicmMssaNoAccForPromotionCap ));
306
+
307
+ LPM1.addPass (LoopRotatePass (/* Disable header duplication */ true ,
308
+ isLTOPreLink (Phase)));
309
+ // TODO: Investigate promotion cap for O1.
310
+ LPM1.addPass (
311
+ LICMPass (PTO.LicmMssaOptCap , PTO.LicmMssaNoAccForPromotionCap ));
312
+ LPM1.addPass (SimpleLoopUnswitchPass ());
313
+ if (EnableLoopFlatten)
314
+ LPM1.addPass (LoopFlattenPass ());
315
+
316
+ LPM2.addPass (LoopIdiomRecognizePass ());
317
+ LPM2.addPass (IndVarSimplifyPass ());
318
+
319
+ for (auto &C : LateLoopOptimizationsEPCallbacks)
320
+ C (LPM2, Level);
345
321
322
+ LPM2.addPass (LoopDeletionPass ());
323
+
324
+ if (EnableLoopInterchange)
325
+ LPM2.addPass (LoopInterchangePass ());
326
+
327
+ // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
328
+ // because it changes IR to makes profile annotation in back compile
329
+ // inaccurate. The normal unroller doesn't pay attention to forced full
330
+ // unroll attributes so we need to make sure and allow the full unroll pass
331
+ // to pay attention to it.
332
+ if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
333
+ PGOOpt->Action != PGOOptions::SampleUse)
334
+ LPM2.addPass (LoopFullUnrollPass (Level.getSpeedupLevel (),
335
+ /* OnlyWhenForced= */ !PTO.LoopUnrolling ,
336
+ PTO.ForgetAllSCEVInLoopUnroll ));
337
+
338
+ for (auto &C : LoopOptimizerEndEPCallbacks)
339
+ C (LPM2, Level);
340
+
341
+ // We provide the opt remark emitter pass for LICM to use. We only need to
342
+ // do this once as it is immutable.
343
+ FPM.addPass (
344
+ RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
345
+ FPM.addPass (
346
+ createFunctionToLoopPassAdaptor (std::move (LPM1),
347
+ /* UseMemorySSA=*/ true ,
348
+ /* UseBlockFrequencyInfo=*/ true ));
349
+ FPM.addPass (SimplifyCFGPass ());
350
+ FPM.addPass (InstCombinePass ());
351
+ // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
352
+ // *All* loop passes must preserve it, in order to be able to use it.
353
+ FPM.addPass (
354
+ createFunctionToLoopPassAdaptor (std::move (LPM2),
355
+ /* UseMemorySSA=*/ false ,
356
+ /* UseBlockFrequencyInfo=*/ false ));
357
+ }
346
358
// Delete small array after loop unroll.
347
359
FPM.addPass (SROAPass ());
348
360
@@ -443,81 +455,92 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
443
455
// Form canonically associated expression trees, and simplify the trees using
444
456
// basic mathematical properties. For example, this will form (nearly)
445
457
// minimal multiplication trees.
446
- FPM.addPass (ReassociatePass ());
447
-
448
- // Add the primary loop simplification pipeline.
449
- // FIXME: Currently this is split into two loop pass pipelines because we run
450
- // some function passes in between them. These can and should be removed
451
- // and/or replaced by scheduling the loop pass equivalents in the correct
452
- // positions. But those equivalent passes aren't powerful enough yet.
453
- // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
454
- // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
455
- // fully replace `SimplifyCFGPass`, and the closest to the other we have is
456
- // `LoopInstSimplify`.
457
- LoopPassManager LPM1, LPM2;
458
-
459
- // Simplify the loop body. We do this initially to clean up after other loop
460
- // passes run, either when iterating on a loop or on inner loops with
461
- // implications on the outer loop.
462
- LPM1.addPass (LoopInstSimplifyPass ());
463
- LPM1.addPass (LoopSimplifyCFGPass ());
464
-
465
- // Try to remove as much code from the loop header as possible,
466
- // to reduce amount of IR that will have to be duplicated.
467
- // TODO: Investigate promotion cap for O1.
468
- LPM1.addPass (LICMPass (PTO.LicmMssaOptCap , PTO.LicmMssaNoAccForPromotionCap ));
458
+ if (!SYCLOptimizationMode) {
459
+ // FIXME: re-association increases variables liveness and therefore register
460
+ // pressure.
461
+ FPM.addPass (ReassociatePass ());
462
+
463
+ // Do not run loop pass pipeline in "SYCL Optimization Mode". Loop
464
+ // optimizations rely on TTI, which is not accurate for SPIR target.
465
+
466
+ // Add the primary loop simplification pipeline.
467
+ // FIXME: Currently this is split into two loop pass pipelines because we
468
+ // run some function passes in between them. These can and should be removed
469
+ // and/or replaced by scheduling the loop pass equivalents in the correct
470
+ // positions. But those equivalent passes aren't powerful enough yet.
471
+ // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
472
+ // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet
473
+ // to fully replace `SimplifyCFGPass`, and the closest to the other we have
474
+ // is `LoopInstSimplify`.
475
+ LoopPassManager LPM1, LPM2;
476
+
477
+ // Simplify the loop body. We do this initially to clean up after other loop
478
+ // passes run, either when iterating on a loop or on inner loops with
479
+ // implications on the outer loop.
480
+ LPM1.addPass (LoopInstSimplifyPass ());
481
+ LPM1.addPass (LoopSimplifyCFGPass ());
482
+
483
+ // Try to remove as much code from the loop header as possible,
484
+ // to reduce amount of IR that will have to be duplicated.
485
+ // TODO: Investigate promotion cap for O1.
486
+ LPM1.addPass (
487
+ LICMPass (PTO.LicmMssaOptCap , PTO.LicmMssaNoAccForPromotionCap ));
488
+
489
+ // Disable header duplication in loop rotation at -Oz.
490
+ LPM1.addPass (
491
+ LoopRotatePass (Level != OptimizationLevel::Oz, isLTOPreLink (Phase)));
492
+ // TODO: Investigate promotion cap for O1.
493
+ LPM1.addPass (
494
+ LICMPass (PTO.LicmMssaOptCap , PTO.LicmMssaNoAccForPromotionCap ));
495
+ LPM1.addPass (SimpleLoopUnswitchPass (/* NonTrivial */ Level ==
496
+ OptimizationLevel::O3 &&
497
+ EnableO3NonTrivialUnswitching));
498
+ if (EnableLoopFlatten)
499
+ LPM1.addPass (LoopFlattenPass ());
500
+
501
+ LPM2.addPass (LoopIdiomRecognizePass ());
502
+ LPM2.addPass (IndVarSimplifyPass ());
469
503
470
- // Disable header duplication in loop rotation at -Oz.
471
- LPM1.addPass (
472
- LoopRotatePass (Level != OptimizationLevel::Oz, isLTOPreLink (Phase)));
473
- // TODO: Investigate promotion cap for O1.
474
- LPM1.addPass (LICMPass (PTO.LicmMssaOptCap , PTO.LicmMssaNoAccForPromotionCap ));
475
- LPM1.addPass (
476
- SimpleLoopUnswitchPass (/* NonTrivial */ Level == OptimizationLevel::O3 &&
477
- EnableO3NonTrivialUnswitching));
478
- if (EnableLoopFlatten)
479
- LPM1.addPass (LoopFlattenPass ());
480
-
481
- LPM2.addPass (LoopIdiomRecognizePass ());
482
- LPM2.addPass (IndVarSimplifyPass ());
483
-
484
- for (auto &C : LateLoopOptimizationsEPCallbacks)
485
- C (LPM2, Level);
486
-
487
- LPM2.addPass (LoopDeletionPass ());
488
-
489
- if (EnableLoopInterchange)
490
- LPM2.addPass (LoopInterchangePass ());
491
-
492
- // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
493
- // because it changes IR to makes profile annotation in back compile
494
- // inaccurate. The normal unroller doesn't pay attention to forced full unroll
495
- // attributes so we need to make sure and allow the full unroll pass to pay
496
- // attention to it.
497
- if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
498
- PGOOpt->Action != PGOOptions::SampleUse)
499
- LPM2.addPass (LoopFullUnrollPass (Level.getSpeedupLevel (),
500
- /* OnlyWhenForced= */ !PTO.LoopUnrolling ,
501
- PTO.ForgetAllSCEVInLoopUnroll ));
502
-
503
- for (auto &C : LoopOptimizerEndEPCallbacks)
504
- C (LPM2, Level);
505
-
506
- // We provide the opt remark emitter pass for LICM to use. We only need to do
507
- // this once as it is immutable.
508
- FPM.addPass (
509
- RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
510
- FPM.addPass (createFunctionToLoopPassAdaptor (std::move (LPM1),
511
- /* UseMemorySSA=*/ true ,
512
- /* UseBlockFrequencyInfo=*/ true ));
513
- FPM.addPass (SimplifyCFGPass ());
514
- FPM.addPass (InstCombinePass ());
515
- // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass,
516
- // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
517
- // *All* loop passes must preserve it, in order to be able to use it.
518
- FPM.addPass (createFunctionToLoopPassAdaptor (std::move (LPM2),
519
- /* UseMemorySSA=*/ false ,
520
- /* UseBlockFrequencyInfo=*/ false ));
504
+ for (auto &C : LateLoopOptimizationsEPCallbacks)
505
+ C (LPM2, Level);
506
+
507
+ LPM2.addPass (LoopDeletionPass ());
508
+
509
+ if (EnableLoopInterchange)
510
+ LPM2.addPass (LoopInterchangePass ());
511
+
512
+ // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
513
+ // because it changes IR to makes profile annotation in back compile
514
+ // inaccurate. The normal unroller doesn't pay attention to forced full
515
+ // unroll attributes so we need to make sure and allow the full unroll pass
516
+ // to pay attention to it.
517
+ if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
518
+ PGOOpt->Action != PGOOptions::SampleUse)
519
+ LPM2.addPass (LoopFullUnrollPass (Level.getSpeedupLevel (),
520
+ /* OnlyWhenForced= */ !PTO.LoopUnrolling ,
521
+ PTO.ForgetAllSCEVInLoopUnroll ));
522
+
523
+ for (auto &C : LoopOptimizerEndEPCallbacks)
524
+ C (LPM2, Level);
525
+
526
+ // We provide the opt remark emitter pass for LICM to use. We only need to
527
+ // do this once as it is immutable.
528
+ FPM.addPass (
529
+ RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
530
+ FPM.addPass (
531
+ createFunctionToLoopPassAdaptor (std::move (LPM1),
532
+ /* UseMemorySSA=*/ true ,
533
+ /* UseBlockFrequencyInfo=*/ true ));
534
+ FPM.addPass (SimplifyCFGPass ());
535
+ FPM.addPass (InstCombinePass ());
536
+ // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass,
537
+ // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
538
+ // *All* loop passes must preserve it, in order to be able to use it.
539
+ FPM.addPass (
540
+ createFunctionToLoopPassAdaptor (std::move (LPM2),
541
+ /* UseMemorySSA=*/ false ,
542
+ /* UseBlockFrequencyInfo=*/ false ));
543
+ }
521
544
522
545
// Delete small array after loop unroll.
523
546
FPM.addPass (SROAPass ());
@@ -1162,29 +1185,32 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
1162
1185
for (auto &C : VectorizerStartEPCallbacks)
1163
1186
C (OptimizePM, Level);
1164
1187
1165
- LoopPassManager LPM;
1166
- // First rotate loops that may have been un-rotated by prior passes.
1167
- // Disable header duplication at -Oz.
1168
- LPM.addPass (LoopRotatePass (Level != OptimizationLevel::Oz, LTOPreLink));
1169
- // Some loops may have become dead by now. Try to delete them.
1170
- // FIXME: see discussion in https://reviews.llvm.org/D112851,
1171
- // this may need to be revisited once we run GVN before loop deletion
1172
- // in the simplification pipeline.
1173
- LPM.addPass (LoopDeletionPass ());
1174
- OptimizePM.addPass (createFunctionToLoopPassAdaptor (
1175
- std::move (LPM), /* UseMemorySSA=*/ false , /* UseBlockFrequencyInfo=*/ false ));
1176
-
1177
- // Distribute loops to allow partial vectorization. I.e. isolate dependences
1178
- // into separate loop that would otherwise inhibit vectorization. This is
1179
- // currently only performed for loops marked with the metadata
1180
- // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
1181
- OptimizePM.addPass (LoopDistributePass ());
1182
-
1183
- // Populates the VFABI attribute with the scalar-to-vector mappings
1184
- // from the TargetLibraryInfo.
1185
- OptimizePM.addPass (InjectTLIMappings ());
1186
-
1187
- addVectorPasses (Level, OptimizePM, /* IsFullLTO */ false );
1188
+ if (!SYCLOptimizationMode) {
1189
+ LoopPassManager LPM;
1190
+ // First rotate loops that may have been un-rotated by prior passes.
1191
+ // Disable header duplication at -Oz.
1192
+ LPM.addPass (LoopRotatePass (Level != OptimizationLevel::Oz, LTOPreLink));
1193
+ // Some loops may have become dead by now. Try to delete them.
1194
+ // FIXME: see discussion in https://reviews.llvm.org/D112851,
1195
+ // this may need to be revisited once we run GVN before loop deletion
1196
+ // in the simplification pipeline.
1197
+ LPM.addPass (LoopDeletionPass ());
1198
+ OptimizePM.addPass (
1199
+ createFunctionToLoopPassAdaptor (std::move (LPM), /* UseMemorySSA=*/ false ,
1200
+ /* UseBlockFrequencyInfo=*/ false ));
1201
+
1202
+ // Distribute loops to allow partial vectorization. I.e. isolate dependences
1203
+ // into separate loop that would otherwise inhibit vectorization. This is
1204
+ // currently only performed for loops marked with the metadata
1205
+ // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
1206
+ OptimizePM.addPass (LoopDistributePass ());
1207
+
1208
+ // Populates the VFABI attribute with the scalar-to-vector mappings
1209
+ // from the TargetLibraryInfo.
1210
+ OptimizePM.addPass (InjectTLIMappings ());
1211
+
1212
+ addVectorPasses (Level, OptimizePM, /* IsFullLTO */ false );
1213
+ }
1188
1214
1189
1215
// LoopSink pass sinks instructions hoisted by LICM, which serves as a
1190
1216
// canonicalization pass that enables other optimizations. As a result,
0 commit comments