@@ -429,54 +429,54 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
429
429
MPM.add (createCFGSimplificationPass ()); // Merge & remove BBs
430
430
MPM.add (createReassociatePass ()); // Reassociate expressions
431
431
432
- // Begin the loop pass pipeline.
433
- if (EnableSimpleLoopUnswitch) {
434
- // The simple loop unswitch pass relies on separate cleanup passes. Schedule
435
- // them first so when we re-process a loop they run before other loop
436
- // passes.
437
- MPM.add (createLoopInstSimplifyPass ());
438
- MPM.add (createLoopSimplifyCFGPass ());
439
- }
440
- // Try to remove as much code from the loop header as possible,
441
- // to reduce amount of IR that will have to be duplicated.
442
- // TODO: Investigate promotion cap for O1.
443
- MPM.add (createLICMPass (LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
444
- // Rotate Loop - disable header duplication at -Oz
445
- MPM.add (createLoopRotatePass (SizeLevel == 2 ? 0 : -1 , PrepareForLTO));
446
- // TODO: Investigate promotion cap for O1.
447
- MPM.add (createLICMPass (LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
448
- if (EnableSimpleLoopUnswitch)
449
- MPM.add (createSimpleLoopUnswitchLegacyPass ());
450
- else
451
- MPM.add (createLoopUnswitchPass (SizeLevel || OptLevel < 3 , DivergentTarget));
452
- // FIXME: We break the loop pass pipeline here in order to do full
453
- // simplify-cfg. Eventually loop-simplifycfg should be enhanced to replace the
454
- // need for this.
455
- MPM.add (createCFGSimplificationPass ());
456
- MPM.add (createInstructionCombiningPass ());
457
- // We resume loop passes creating a second loop pipeline here.
458
- if (EnableLoopFlatten) {
459
- MPM.add (createLoopFlattenPass ()); // Flatten loops
460
- MPM.add (createLoopSimplifyCFGPass ());
432
+ // Do not run loop pass pipeline in "SYCL Optimization Mode". Loop
433
+ // optimizations rely on TTI, which is not accurate for SPIR target.
434
+ if (!SYCLOptimizationMode) {
435
+ // Begin the loop pass pipeline.
436
+ if (EnableSimpleLoopUnswitch) {
437
+ // The simple loop unswitch pass relies on separate cleanup passes.
438
+ // Schedule them first so when we re-process a loop they run before other
439
+ // loop passes.
440
+ MPM.add (createLoopInstSimplifyPass ());
441
+ MPM.add (createLoopSimplifyCFGPass ());
442
+ }
443
+ // Try to remove as much code from the loop header as possible,
444
+ // to reduce amount of IR that will have to be duplicated.
445
+ // TODO: Investigate promotion cap for O1.
446
+ MPM.add (createLICMPass (LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
447
+ // Rotate Loop - disable header duplication at -Oz
448
+ MPM.add (createLoopRotatePass (SizeLevel == 2 ? 0 : -1 , PrepareForLTO));
449
+ // TODO: Investigate promotion cap for O1.
450
+ MPM.add (createLICMPass (LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
451
+ if (EnableSimpleLoopUnswitch)
452
+ MPM.add (createSimpleLoopUnswitchLegacyPass ());
453
+ else
454
+ MPM.add (
455
+ createLoopUnswitchPass (SizeLevel || OptLevel < 3 , DivergentTarget));
456
+ // FIXME: We break the loop pass pipeline here in order to do full
457
+ // simplify-cfg. Eventually loop-simplifycfg should be enhanced to replace
458
+ // the need for this.
459
+ MPM.add (createCFGSimplificationPass ());
460
+ MPM.add (createInstructionCombiningPass ());
461
+ // We resume loop passes creating a second loop pipeline here.
462
+ if (EnableLoopFlatten) {
463
+ MPM.add (createLoopFlattenPass ()); // Flatten loops
464
+ MPM.add (createLoopSimplifyCFGPass ());
465
+ }
466
+ MPM.add (createLoopIdiomPass ()); // Recognize idioms like memset.
467
+ MPM.add (createIndVarSimplifyPass ()); // Canonicalize indvars
468
+ addExtensionsToPM (EP_LateLoopOptimizations, MPM);
469
+ MPM.add (createLoopDeletionPass ()); // Delete dead loops
470
+
471
+ if (EnableLoopInterchange)
472
+ MPM.add (createLoopInterchangePass ()); // Interchange loops
473
+
474
+ // Unroll small loops and perform peeling.
475
+ MPM.add (createSimpleLoopUnrollPass (OptLevel, DisableUnrollLoops,
476
+ ForgetAllSCEVInLoopUnroll));
477
+ addExtensionsToPM (EP_LoopOptimizerEnd, MPM);
478
+ // This ends the loop pass pipelines.
461
479
}
462
- MPM.add (createLoopIdiomPass ()); // Recognize idioms like memset.
463
- // TODO: this pass hurts performance due to promotions of induction variables
464
- // from 32-bit value to 64-bit values. I assume it's because SPIR is a virtual
465
- // target with unlimited # of registers and pass doesn't take into account
466
- // that on real HW this promotion is not beneficial.
467
- if (!SYCLOptimizationMode)
468
- MPM.add (createIndVarSimplifyPass ()); // Canonicalize indvars
469
- addExtensionsToPM (EP_LateLoopOptimizations, MPM);
470
- MPM.add (createLoopDeletionPass ()); // Delete dead loops
471
-
472
- if (EnableLoopInterchange)
473
- MPM.add (createLoopInterchangePass ()); // Interchange loops
474
-
475
- // Unroll small loops and perform peeling.
476
- MPM.add (createSimpleLoopUnrollPass (OptLevel, DisableUnrollLoops,
477
- ForgetAllSCEVInLoopUnroll));
478
- addExtensionsToPM (EP_LoopOptimizerEnd, MPM);
479
- // This ends the loop pass pipelines.
480
480
481
481
// Break up allocas that may now be splittable after loop unrolling.
482
482
MPM.add (createSROAPass ());
@@ -788,68 +788,74 @@ void PassManagerBuilder::populateModulePassManager(
788
788
789
789
addExtensionsToPM (EP_VectorizerStart, MPM);
790
790
791
- // Re-rotate loops in all our loop nests. These may have fallout out of
792
- // rotated form due to GVN or other transformations, and the vectorizer relies
793
- // on the rotated form. Disable header duplication at -Oz.
794
- MPM.add (createLoopRotatePass (SizeLevel == 2 ? 0 : -1 , PrepareForLTO));
795
-
796
- // Distribute loops to allow partial vectorization. I.e. isolate dependences
797
- // into separate loop that would otherwise inhibit vectorization. This is
798
- // currently only performed for loops marked with the metadata
799
- // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
800
- MPM.add (createLoopDistributePass ());
801
-
802
- MPM.add (createLoopVectorizePass (!LoopsInterleaved, !LoopVectorize));
803
-
804
- // Eliminate loads by forwarding stores from the previous iteration to loads
805
- // of the current iteration.
806
- MPM.add (createLoopLoadEliminationPass ());
807
-
808
- // FIXME: Because of #pragma vectorize enable, the passes below are always
809
- // inserted in the pipeline, even when the vectorizer doesn't run (ex. when
810
- // on -O1 and no #pragma is found). Would be good to have these two passes
811
- // as function calls, so that we can only pass them when the vectorizer
812
- // changed the code.
813
- MPM.add (createInstructionCombiningPass ());
814
- if (OptLevel > 1 && ExtraVectorizerPasses) {
815
- // At higher optimization levels, try to clean up any runtime overlap and
816
- // alignment checks inserted by the vectorizer. We want to track correllated
817
- // runtime checks for two inner loops in the same outer loop, fold any
818
- // common computations, hoist loop-invariant aspects out of any outer loop,
819
- // and unswitch the runtime checks if possible. Once hoisted, we may have
820
- // dead (or speculatable) control flows or more combining opportunities.
821
- MPM.add (createEarlyCSEPass ());
822
- MPM.add (createCorrelatedValuePropagationPass ());
823
- MPM.add (createInstructionCombiningPass ());
824
- MPM.add (createLICMPass (LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
825
- MPM.add (createLoopUnswitchPass (SizeLevel || OptLevel < 3 , DivergentTarget));
826
- MPM.add (createCFGSimplificationPass ());
791
+ if (!SYCLOptimizationMode) {
792
+ // Re-rotate loops in all our loop nests. These may have fallout out of
793
+ // rotated form due to GVN or other transformations, and the vectorizer
794
+ // relies on the rotated form. Disable header duplication at -Oz.
795
+ MPM.add (createLoopRotatePass (SizeLevel == 2 ? 0 : -1 , PrepareForLTO));
796
+
797
+ // Distribute loops to allow partial vectorization. I.e. isolate
798
+ // dependences into separate loop that would otherwise inhibit
799
+ // vectorization. This is currently only performed for loops marked with
800
+ // the metadata llvm.loop.distribute=true or when -enable-loop-distribute is
801
+ // specified.
802
+ MPM.add (createLoopDistributePass ());
803
+
804
+ MPM.add (createLoopVectorizePass (!LoopsInterleaved, !LoopVectorize));
805
+
806
+ // Eliminate loads by forwarding stores from the previous iteration to loads
807
+ // of the current iteration.
808
+ MPM.add (createLoopLoadEliminationPass ());
809
+
810
+ // FIXME: Because of #pragma vectorize enable, the passes below are always
811
+ // inserted in the pipeline, even when the vectorizer doesn't run (ex. when
812
+ // on -O1 and no #pragma is found). Would be good to have these two passes
813
+ // as function calls, so that we can only pass them when the vectorizer
814
+ // changed the code.
827
815
MPM.add (createInstructionCombiningPass ());
828
- }
829
-
830
- // Cleanup after loop vectorization, etc. Simplification passes like CVP and
831
- // GVN, loop transforms, and others have already run, so it's now better to
832
- // convert to more optimized IR using more aggressive simplify CFG options.
833
- // The extra sinking transform can create larger basic blocks, so do this
834
- // before SLP vectorization.
835
- // FIXME: study whether hoisting and/or sinking of common instructions should
836
- // be delayed until after SLP vectorizer.
837
- MPM.add (createCFGSimplificationPass (SimplifyCFGOptions ()
838
- .forwardSwitchCondToPhi (true )
839
- .convertSwitchToLookupTable (true )
840
- .needCanonicalLoops (false )
841
- .hoistCommonInsts (true )
842
- .sinkCommonInsts (true )));
843
-
844
- if (SLPVectorize) {
845
- MPM.add (createSLPVectorizerPass ()); // Vectorize parallel scalar chains.
846
816
if (OptLevel > 1 && ExtraVectorizerPasses) {
817
+ // At higher optimization levels, try to clean up any runtime overlap and
818
+ // alignment checks inserted by the vectorizer. We want to track
819
+ // correllated runtime checks for two inner loops in the same outer loop,
820
+ // fold any common computations, hoist loop-invariant aspects out of any
821
+ // outer loop, and unswitch the runtime checks if possible. Once hoisted,
822
+ // we may have dead (or speculatable) control flows or more combining
823
+ // opportunities.
847
824
MPM.add (createEarlyCSEPass ());
825
+ MPM.add (createCorrelatedValuePropagationPass ());
826
+ MPM.add (createInstructionCombiningPass ());
827
+ MPM.add (createLICMPass (LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
828
+ MPM.add (
829
+ createLoopUnswitchPass (SizeLevel || OptLevel < 3 , DivergentTarget));
830
+ MPM.add (createCFGSimplificationPass ());
831
+ MPM.add (createInstructionCombiningPass ());
848
832
}
849
- }
850
833
851
- // Enhance/cleanup vector code.
852
- MPM.add (createVectorCombinePass ());
834
+ // Cleanup after loop vectorization, etc. Simplification passes like CVP and
835
+ // GVN, loop transforms, and others have already run, so it's now better to
836
+ // convert to more optimized IR using more aggressive simplify CFG options.
837
+ // The extra sinking transform can create larger basic blocks, so do this
838
+ // before SLP vectorization.
839
+ // FIXME: study whether hoisting and/or sinking of common instructions
840
+ // should
841
+ // be delayed until after SLP vectorizer.
842
+ MPM.add (createCFGSimplificationPass (SimplifyCFGOptions ()
843
+ .forwardSwitchCondToPhi (true )
844
+ .convertSwitchToLookupTable (true )
845
+ .needCanonicalLoops (false )
846
+ .hoistCommonInsts (true )
847
+ .sinkCommonInsts (true )));
848
+
849
+ if (SLPVectorize) {
850
+ MPM.add (createSLPVectorizerPass ()); // Vectorize parallel scalar chains.
851
+ if (OptLevel > 1 && ExtraVectorizerPasses) {
852
+ MPM.add (createEarlyCSEPass ());
853
+ }
854
+ }
855
+
856
+ // Enhance/cleanup vector code.
857
+ MPM.add (createVectorCombinePass ());
858
+ }
853
859
854
860
addExtensionsToPM (EP_Peephole, MPM);
855
861
MPM.add (createInstructionCombiningPass ());
@@ -861,22 +867,24 @@ void PassManagerBuilder::populateModulePassManager(
861
867
MPM.add (createLoopUnrollAndJamPass (OptLevel));
862
868
}
863
869
864
- // Unroll small loops
865
- MPM.add (createLoopUnrollPass (OptLevel, DisableUnrollLoops,
866
- ForgetAllSCEVInLoopUnroll));
870
+ if (!SYCLOptimizationMode) {
871
+ // Unroll small loops
872
+ MPM.add (createLoopUnrollPass (OptLevel, DisableUnrollLoops,
873
+ ForgetAllSCEVInLoopUnroll));
867
874
868
- if (!DisableUnrollLoops) {
869
- // LoopUnroll may generate some redundency to cleanup.
870
- MPM.add (createInstructionCombiningPass ());
875
+ if (!DisableUnrollLoops) {
876
+ // LoopUnroll may generate some redundency to cleanup.
877
+ MPM.add (createInstructionCombiningPass ());
871
878
872
- // Runtime unrolling will introduce runtime check in loop prologue. If the
873
- // unrolled loop is a inner loop, then the prologue will be inside the
874
- // outer loop. LICM pass can help to promote the runtime check out if the
875
- // checked value is loop invariant.
876
- MPM.add (createLICMPass (LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
877
- }
879
+ // Runtime unrolling will introduce runtime check in loop prologue. If the
880
+ // unrolled loop is a inner loop, then the prologue will be inside the
881
+ // outer loop. LICM pass can help to promote the runtime check out if the
882
+ // checked value is loop invariant.
883
+ MPM.add (createLICMPass (LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
884
+ }
878
885
879
- MPM.add (createWarnMissedTransformationsPass ());
886
+ MPM.add (createWarnMissedTransformationsPass ());
887
+ }
880
888
881
889
// After vectorization and unrolling, assume intrinsics may tell us more
882
890
// about pointer alignments.
0 commit comments