@@ -1629,7 +1629,7 @@ struct OpenMPOpt {
1629
1629
for (auto *F : SCC) {
1630
1630
if (!F->isDeclaration ())
1631
1631
A.getOrCreateAAFor <AAExecutionDomain>(IRPosition::function (*F));
1632
- if (!OMPInfoCache. Kernels . empty ( ))
1632
+ if (isOpenMPDevice (M ))
1633
1633
A.getOrCreateAAFor <AAHeapToStack>(IRPosition::function (*F));
1634
1634
}
1635
1635
}
@@ -2629,17 +2629,18 @@ AAHeapToShared &AAHeapToShared::createForPosition(const IRPosition &IRP,
2629
2629
}
2630
2630
2631
2631
PreservedAnalyses OpenMPOptPass::run (Module &M, ModuleAnalysisManager &AM) {
2632
- if (!containsOpenMP (M, OMPInModule ))
2632
+ if (!containsOpenMP (M))
2633
2633
return PreservedAnalyses::all ();
2634
-
2635
2634
if (DisableOpenMPOptimizations)
2636
2635
return PreservedAnalyses::all ();
2637
2636
2637
+ KernelSet Kernels = getDeviceKernels (M);
2638
+
2638
2639
// Create internal copies of each function if this is a kernel Module.
2639
2640
DenseSet<const Function *> InternalizedFuncs;
2640
- if (!OMPInModule. getKernels (). empty ( ))
2641
+ if (isOpenMPDevice (M ))
2641
2642
for (Function &F : M)
2642
- if (!F.isDeclaration () && !OMPInModule. getKernels () .contains (&F))
2643
+ if (!F.isDeclaration () && !Kernels .contains (&F))
2643
2644
if (Attributor::internalizeFunction (F, /* Force */ true ))
2644
2645
InternalizedFuncs.insert (&F);
2645
2646
@@ -2665,10 +2666,9 @@ PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) {
2665
2666
CallGraphUpdater CGUpdater;
2666
2667
2667
2668
SetVector<Function *> Functions (SCC.begin (), SCC.end ());
2668
- OMPInformationCache InfoCache (M, AG, Allocator, /* CGSCC*/ Functions,
2669
- OMPInModule.getKernels ());
2669
+ OMPInformationCache InfoCache (M, AG, Allocator, /* CGSCC*/ Functions, Kernels);
2670
2670
2671
- unsigned MaxFixponitIterations = (!OMPInModule. getKernels () .empty ()) ? 64 : 32 ;
2671
+ unsigned MaxFixponitIterations = (Kernels .empty ()) ? 64 : 32 ;
2672
2672
Attributor A (Functions, InfoCache, CGUpdater, nullptr , true , false , MaxFixponitIterations, OREGetter,
2673
2673
DEBUG_TYPE);
2674
2674
@@ -2684,30 +2684,25 @@ PreservedAnalyses OpenMPOptCGSCCPass::run(LazyCallGraph::SCC &C,
2684
2684
CGSCCAnalysisManager &AM,
2685
2685
LazyCallGraph &CG,
2686
2686
CGSCCUpdateResult &UR) {
2687
- if (!containsOpenMP (*C.begin ()->getFunction ().getParent (), OMPInModule ))
2687
+ if (!containsOpenMP (*C.begin ()->getFunction ().getParent ()))
2688
2688
return PreservedAnalyses::all ();
2689
-
2690
2689
if (DisableOpenMPOptimizations)
2691
2690
return PreservedAnalyses::all ();
2692
2691
2693
2692
SmallVector<Function *, 16 > SCC;
2694
2693
// If there are kernels in the module, we have to run on all SCC's.
2695
- bool SCCIsInteresting = !OMPInModule.getKernels ().empty ();
2696
2694
for (LazyCallGraph::Node &N : C) {
2697
2695
Function *Fn = &N.getFunction ();
2698
2696
SCC.push_back (Fn);
2699
-
2700
- // Do we already know that the SCC contains kernels,
2701
- // or that OpenMP functions are called from this SCC?
2702
- if (SCCIsInteresting)
2703
- continue ;
2704
- // If not, let's check that.
2705
- SCCIsInteresting |= OMPInModule.containsOMPRuntimeCalls (Fn);
2706
2697
}
2707
2698
2708
- if (!SCCIsInteresting || SCC.empty ())
2699
+ if (SCC.empty ())
2709
2700
return PreservedAnalyses::all ();
2710
2701
2702
+ Module &M = *C.begin ()->getFunction ().getParent ();
2703
+
2704
+ KernelSet Kernels = getDeviceKernels (M);
2705
+
2711
2706
FunctionAnalysisManager &FAM =
2712
2707
AM.getResult <FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager ();
2713
2708
@@ -2723,9 +2718,9 @@ PreservedAnalyses OpenMPOptCGSCCPass::run(LazyCallGraph::SCC &C,
2723
2718
2724
2719
SetVector<Function *> Functions (SCC.begin (), SCC.end ());
2725
2720
OMPInformationCache InfoCache (*(Functions.back ()->getParent ()), AG, Allocator,
2726
- /* CGSCC*/ Functions, OMPInModule. getKernels () );
2721
+ /* CGSCC*/ Functions, Kernels );
2727
2722
2728
- unsigned MaxFixponitIterations = (!OMPInModule. getKernels (). empty ( )) ? 64 : 32 ;
2723
+ unsigned MaxFixponitIterations = (isOpenMPDevice (M )) ? 64 : 32 ;
2729
2724
Attributor A (Functions, InfoCache, CGUpdater, nullptr , false , true , MaxFixponitIterations, OREGetter,
2730
2725
DEBUG_TYPE);
2731
2726
@@ -2741,7 +2736,6 @@ namespace {
2741
2736
2742
2737
struct OpenMPOptCGSCCLegacyPass : public CallGraphSCCPass {
2743
2738
CallGraphUpdater CGUpdater;
2744
- OpenMPInModule OMPInModule;
2745
2739
static char ID;
2746
2740
2747
2741
OpenMPOptCGSCCLegacyPass () : CallGraphSCCPass(ID) {
@@ -2752,38 +2746,27 @@ struct OpenMPOptCGSCCLegacyPass : public CallGraphSCCPass {
2752
2746
CallGraphSCCPass::getAnalysisUsage (AU);
2753
2747
}
2754
2748
2755
- bool doInitialization (CallGraph &CG) override {
2756
- // Disable the pass if there is no OpenMP (runtime call) in the module.
2757
- containsOpenMP (CG.getModule (), OMPInModule);
2758
- return false ;
2759
- }
2760
-
2761
2749
bool runOnSCC (CallGraphSCC &CGSCC) override {
2762
- if (!containsOpenMP (CGSCC.getCallGraph ().getModule (), OMPInModule ))
2750
+ if (!containsOpenMP (CGSCC.getCallGraph ().getModule ()))
2763
2751
return false ;
2764
2752
if (DisableOpenMPOptimizations || skipSCC (CGSCC))
2765
2753
return false ;
2766
2754
2767
2755
SmallVector<Function *, 16 > SCC;
2768
2756
// If there are kernels in the module, we have to run on all SCC's.
2769
- bool SCCIsInteresting = !OMPInModule.getKernels ().empty ();
2770
2757
for (CallGraphNode *CGN : CGSCC) {
2771
2758
Function *Fn = CGN->getFunction ();
2772
2759
if (!Fn || Fn->isDeclaration ())
2773
2760
continue ;
2774
2761
SCC.push_back (Fn);
2775
-
2776
- // Do we already know that the SCC contains kernels,
2777
- // or that OpenMP functions are called from this SCC?
2778
- if (SCCIsInteresting)
2779
- continue ;
2780
- // If not, let's check that.
2781
- SCCIsInteresting |= OMPInModule.containsOMPRuntimeCalls (Fn);
2782
2762
}
2783
2763
2784
- if (!SCCIsInteresting || SCC.empty ())
2764
+ if (SCC.empty ())
2785
2765
return false ;
2786
2766
2767
+ Module &M = CGSCC.getCallGraph ().getModule ();
2768
+ KernelSet Kernels = getDeviceKernels (M);
2769
+
2787
2770
CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph ();
2788
2771
CGUpdater.initialize (CG, CGSCC);
2789
2772
@@ -2799,11 +2782,11 @@ struct OpenMPOptCGSCCLegacyPass : public CallGraphSCCPass {
2799
2782
AnalysisGetter AG;
2800
2783
SetVector<Function *> Functions (SCC.begin (), SCC.end ());
2801
2784
BumpPtrAllocator Allocator;
2802
- OMPInformationCache InfoCache (
2803
- *(Functions. back ()-> getParent ()), AG, Allocator,
2804
- /* CGSCC*/ Functions, OMPInModule. getKernels () );
2785
+ OMPInformationCache InfoCache (*(Functions. back ()-> getParent ()), AG,
2786
+ Allocator,
2787
+ /* CGSCC*/ Functions, Kernels );
2805
2788
2806
- unsigned MaxFixponitIterations = (!OMPInModule. getKernels (). empty ( )) ? 64 : 32 ;
2789
+ unsigned MaxFixponitIterations = (isOpenMPDevice (M )) ? 64 : 32 ;
2807
2790
Attributor A (Functions, InfoCache, CGUpdater, nullptr , false , true ,
2808
2791
MaxFixponitIterations, OREGetter, DEBUG_TYPE);
2809
2792
@@ -2816,11 +2799,13 @@ struct OpenMPOptCGSCCLegacyPass : public CallGraphSCCPass {
2816
2799
2817
2800
} // end anonymous namespace
2818
2801
2819
- void OpenMPInModule::identifyKernels (Module &M) {
2820
-
2802
+ KernelSet llvm::omp::getDeviceKernels (Module &M) {
2803
+ // TODO: Create a more cross-platform way of determining device kernels.
2821
2804
NamedMDNode *MD = M.getOrInsertNamedMetadata (" nvvm.annotations" );
2805
+ KernelSet Kernels;
2806
+
2822
2807
if (!MD)
2823
- return ;
2808
+ return Kernels ;
2824
2809
2825
2810
for (auto *Op : MD->operands ()) {
2826
2811
if (Op->getNumOperands () < 2 )
@@ -2838,38 +2823,24 @@ void OpenMPInModule::identifyKernels(Module &M) {
2838
2823
2839
2824
Kernels.insert (KernelFn);
2840
2825
}
2841
- }
2842
2826
2843
- bool llvm::omp::containsOpenMP (Module &M, OpenMPInModule &OMPInModule) {
2844
- if (OMPInModule.isKnown ())
2845
- return OMPInModule;
2827
+ return Kernels;
2828
+ }
2846
2829
2847
- auto RecordFunctionsContainingUsesOf = [&](Function *F) {
2848
- for (User *U : F->users ())
2849
- if (auto *I = dyn_cast<Instruction>(U))
2850
- OMPInModule.FuncsWithOMPRuntimeCalls .insert (I->getFunction ());
2851
- };
2830
+ bool llvm::omp::containsOpenMP (Module &M) {
2831
+ Metadata *MD = M.getModuleFlag (" openmp" );
2832
+ if (!MD)
2833
+ return false ;
2852
2834
2853
- // MSVC doesn't like long if-else chains for some reason and instead just
2854
- // issues an error. Work around it..
2855
- do {
2856
- #define OMP_RTL (_Enum, _Name, ...) \
2857
- if (Function *F = M.getFunction (_Name)) { \
2858
- RecordFunctionsContainingUsesOf (F); \
2859
- OMPInModule = true ; \
2860
- }
2861
- #include " llvm/Frontend/OpenMP/OMPKinds.def"
2862
- } while (false );
2835
+ return true ;
2836
+ }
2863
2837
2864
- // Identify kernels once. TODO: We should split the OMPInformationCache into a
2865
- // module and an SCC part. The kernel information, among other things, could
2866
- // go into the module part.
2867
- if (OMPInModule.isKnown () && OMPInModule) {
2868
- OMPInModule.identifyKernels (M);
2869
- return true ;
2870
- }
2838
+ bool llvm::omp::isOpenMPDevice (Module &M) {
2839
+ Metadata *MD = M.getModuleFlag (" openmp-device" );
2840
+ if (!MD)
2841
+ return false ;
2871
2842
2872
- return OMPInModule = false ;
2843
+ return true ;
2873
2844
}
2874
2845
2875
2846
char OpenMPOptCGSCCLegacyPass::ID = 0 ;
0 commit comments