Skip to content

Commit 5ccb742

Browse files
jhuber6jhuber-ornl
authored andcommitted
[OpenMP] Change OpenMPOpt to check openmp metadata
The metadata added in D102361 introduces a module flag that we can check to determine if the module was compiled with `-fopenmp` enables. We can now check for the precense of this instead of scanning the call graph for OpenMP runtime functions. Depends on D102361 Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D102423
1 parent 9ce02ea commit 5ccb742

21 files changed

+139
-150
lines changed

llvm/include/llvm/Transforms/IPO/OpenMPOpt.h

Lines changed: 8 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -20,60 +20,27 @@ namespace omp {
2020
/// Summary of a kernel (=entry point for target offloading).
2121
using Kernel = Function *;
2222

23-
/// Helper to remember if the module contains OpenMP (runtime calls), to be used
24-
/// foremost with containsOpenMP.
25-
struct OpenMPInModule {
26-
OpenMPInModule &operator=(bool Found) {
27-
if (Found)
28-
Value = OpenMPInModule::OpenMP::FOUND;
29-
else
30-
Value = OpenMPInModule::OpenMP::NOT_FOUND;
31-
return *this;
32-
}
33-
bool isKnown() { return Value != OpenMP::UNKNOWN; }
34-
operator bool() { return Value != OpenMP::NOT_FOUND; }
23+
/// Set of kernels in the module
24+
using KernelSet = SmallPtrSet<Kernel, 4>;
3525

36-
/// Does this function \p F contain any OpenMP runtime calls?
37-
bool containsOMPRuntimeCalls(Function *F) const {
38-
return FuncsWithOMPRuntimeCalls.contains(F);
39-
}
26+
/// Helper to determine if \p M contains OpenMP.
27+
bool containsOpenMP(Module &M);
4028

41-
/// Return the known kernels (=GPU entry points) in the module.
42-
SmallPtrSetImpl<Kernel> &getKernels() { return Kernels; }
29+
/// Helper to determine if \p M is a OpenMP target offloading device module.
30+
bool isOpenMPDevice(Module &M);
4331

44-
/// Identify kernels in the module and populate the Kernels set.
45-
void identifyKernels(Module &M);
46-
47-
private:
48-
enum class OpenMP { FOUND, NOT_FOUND, UNKNOWN } Value = OpenMP::UNKNOWN;
49-
50-
friend bool containsOpenMP(Module &M, OpenMPInModule &OMPInModule);
51-
52-
/// In which functions are OpenMP runtime calls present?
53-
SmallPtrSet<Function *, 32> FuncsWithOMPRuntimeCalls;
54-
55-
/// Collection of known kernels (=GPU entry points) in the module.
56-
SmallPtrSet<Kernel, 8> Kernels;
57-
};
58-
59-
/// Helper to determine if \p M contains OpenMP (runtime calls).
60-
bool containsOpenMP(Module &M, OpenMPInModule &OMPInModule);
32+
/// Get OpenMP device kernels in \p M.
33+
KernelSet getDeviceKernels(Module &M);
6134

6235
} // namespace omp
6336

6437
/// OpenMP optimizations pass.
6538
class OpenMPOptPass : public PassInfoMixin<OpenMPOptPass> {
66-
/// Helper to remember if the module contains OpenMP (runtime calls).
67-
omp::OpenMPInModule OMPInModule;
68-
6939
public:
7040
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
7141
};
7242

7343
class OpenMPOptCGSCCPass : public PassInfoMixin<OpenMPOptCGSCCPass> {
74-
/// Helper to remember if the module contains OpenMP (runtime calls).
75-
omp::OpenMPInModule OMPInModule;
76-
7744
public:
7845
PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM,
7946
LazyCallGraph &CG, CGSCCUpdateResult &UR);

llvm/lib/Transforms/IPO/OpenMPOpt.cpp

Lines changed: 43 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -1629,7 +1629,7 @@ struct OpenMPOpt {
16291629
for (auto *F : SCC) {
16301630
if (!F->isDeclaration())
16311631
A.getOrCreateAAFor<AAExecutionDomain>(IRPosition::function(*F));
1632-
if (!OMPInfoCache.Kernels.empty())
1632+
if (isOpenMPDevice(M))
16331633
A.getOrCreateAAFor<AAHeapToStack>(IRPosition::function(*F));
16341634
}
16351635
}
@@ -2629,17 +2629,18 @@ AAHeapToShared &AAHeapToShared::createForPosition(const IRPosition &IRP,
26292629
}
26302630

26312631
PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) {
2632-
if (!containsOpenMP(M, OMPInModule))
2632+
if (!containsOpenMP(M))
26332633
return PreservedAnalyses::all();
2634-
26352634
if (DisableOpenMPOptimizations)
26362635
return PreservedAnalyses::all();
26372636

2637+
KernelSet Kernels = getDeviceKernels(M);
2638+
26382639
// Create internal copies of each function if this is a kernel Module.
26392640
DenseSet<const Function *> InternalizedFuncs;
2640-
if (!OMPInModule.getKernels().empty())
2641+
if (isOpenMPDevice(M))
26412642
for (Function &F : M)
2642-
if (!F.isDeclaration() && !OMPInModule.getKernels().contains(&F))
2643+
if (!F.isDeclaration() && !Kernels.contains(&F))
26432644
if (Attributor::internalizeFunction(F, /* Force */ true))
26442645
InternalizedFuncs.insert(&F);
26452646

@@ -2665,10 +2666,9 @@ PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) {
26652666
CallGraphUpdater CGUpdater;
26662667

26672668
SetVector<Function *> Functions(SCC.begin(), SCC.end());
2668-
OMPInformationCache InfoCache(M, AG, Allocator, /*CGSCC*/ Functions,
2669-
OMPInModule.getKernels());
2669+
OMPInformationCache InfoCache(M, AG, Allocator, /*CGSCC*/ Functions, Kernels);
26702670

2671-
unsigned MaxFixponitIterations = (!OMPInModule.getKernels().empty()) ? 64 : 32;
2671+
unsigned MaxFixponitIterations = (Kernels.empty()) ? 64 : 32;
26722672
Attributor A(Functions, InfoCache, CGUpdater, nullptr, true, false, MaxFixponitIterations, OREGetter,
26732673
DEBUG_TYPE);
26742674

@@ -2684,30 +2684,25 @@ PreservedAnalyses OpenMPOptCGSCCPass::run(LazyCallGraph::SCC &C,
26842684
CGSCCAnalysisManager &AM,
26852685
LazyCallGraph &CG,
26862686
CGSCCUpdateResult &UR) {
2687-
if (!containsOpenMP(*C.begin()->getFunction().getParent(), OMPInModule))
2687+
if (!containsOpenMP(*C.begin()->getFunction().getParent()))
26882688
return PreservedAnalyses::all();
2689-
26902689
if (DisableOpenMPOptimizations)
26912690
return PreservedAnalyses::all();
26922691

26932692
SmallVector<Function *, 16> SCC;
26942693
// If there are kernels in the module, we have to run on all SCC's.
2695-
bool SCCIsInteresting = !OMPInModule.getKernels().empty();
26962694
for (LazyCallGraph::Node &N : C) {
26972695
Function *Fn = &N.getFunction();
26982696
SCC.push_back(Fn);
2699-
2700-
// Do we already know that the SCC contains kernels,
2701-
// or that OpenMP functions are called from this SCC?
2702-
if (SCCIsInteresting)
2703-
continue;
2704-
// If not, let's check that.
2705-
SCCIsInteresting |= OMPInModule.containsOMPRuntimeCalls(Fn);
27062697
}
27072698

2708-
if (!SCCIsInteresting || SCC.empty())
2699+
if (SCC.empty())
27092700
return PreservedAnalyses::all();
27102701

2702+
Module &M = *C.begin()->getFunction().getParent();
2703+
2704+
KernelSet Kernels = getDeviceKernels(M);
2705+
27112706
FunctionAnalysisManager &FAM =
27122707
AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
27132708

@@ -2723,9 +2718,9 @@ PreservedAnalyses OpenMPOptCGSCCPass::run(LazyCallGraph::SCC &C,
27232718

27242719
SetVector<Function *> Functions(SCC.begin(), SCC.end());
27252720
OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, Allocator,
2726-
/*CGSCC*/ Functions, OMPInModule.getKernels());
2721+
/*CGSCC*/ Functions, Kernels);
27272722

2728-
unsigned MaxFixponitIterations = (!OMPInModule.getKernels().empty()) ? 64 : 32;
2723+
unsigned MaxFixponitIterations = (isOpenMPDevice(M)) ? 64 : 32;
27292724
Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true, MaxFixponitIterations, OREGetter,
27302725
DEBUG_TYPE);
27312726

@@ -2741,7 +2736,6 @@ namespace {
27412736

27422737
struct OpenMPOptCGSCCLegacyPass : public CallGraphSCCPass {
27432738
CallGraphUpdater CGUpdater;
2744-
OpenMPInModule OMPInModule;
27452739
static char ID;
27462740

27472741
OpenMPOptCGSCCLegacyPass() : CallGraphSCCPass(ID) {
@@ -2752,38 +2746,27 @@ struct OpenMPOptCGSCCLegacyPass : public CallGraphSCCPass {
27522746
CallGraphSCCPass::getAnalysisUsage(AU);
27532747
}
27542748

2755-
bool doInitialization(CallGraph &CG) override {
2756-
// Disable the pass if there is no OpenMP (runtime call) in the module.
2757-
containsOpenMP(CG.getModule(), OMPInModule);
2758-
return false;
2759-
}
2760-
27612749
bool runOnSCC(CallGraphSCC &CGSCC) override {
2762-
if (!containsOpenMP(CGSCC.getCallGraph().getModule(), OMPInModule))
2750+
if (!containsOpenMP(CGSCC.getCallGraph().getModule()))
27632751
return false;
27642752
if (DisableOpenMPOptimizations || skipSCC(CGSCC))
27652753
return false;
27662754

27672755
SmallVector<Function *, 16> SCC;
27682756
// If there are kernels in the module, we have to run on all SCC's.
2769-
bool SCCIsInteresting = !OMPInModule.getKernels().empty();
27702757
for (CallGraphNode *CGN : CGSCC) {
27712758
Function *Fn = CGN->getFunction();
27722759
if (!Fn || Fn->isDeclaration())
27732760
continue;
27742761
SCC.push_back(Fn);
2775-
2776-
// Do we already know that the SCC contains kernels,
2777-
// or that OpenMP functions are called from this SCC?
2778-
if (SCCIsInteresting)
2779-
continue;
2780-
// If not, let's check that.
2781-
SCCIsInteresting |= OMPInModule.containsOMPRuntimeCalls(Fn);
27822762
}
27832763

2784-
if (!SCCIsInteresting || SCC.empty())
2764+
if (SCC.empty())
27852765
return false;
27862766

2767+
Module &M = CGSCC.getCallGraph().getModule();
2768+
KernelSet Kernels = getDeviceKernels(M);
2769+
27872770
CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
27882771
CGUpdater.initialize(CG, CGSCC);
27892772

@@ -2799,11 +2782,11 @@ struct OpenMPOptCGSCCLegacyPass : public CallGraphSCCPass {
27992782
AnalysisGetter AG;
28002783
SetVector<Function *> Functions(SCC.begin(), SCC.end());
28012784
BumpPtrAllocator Allocator;
2802-
OMPInformationCache InfoCache(
2803-
*(Functions.back()->getParent()), AG, Allocator,
2804-
/*CGSCC*/ Functions, OMPInModule.getKernels());
2785+
OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG,
2786+
Allocator,
2787+
/*CGSCC*/ Functions, Kernels);
28052788

2806-
unsigned MaxFixponitIterations = (!OMPInModule.getKernels().empty()) ? 64 : 32;
2789+
unsigned MaxFixponitIterations = (isOpenMPDevice(M)) ? 64 : 32;
28072790
Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true,
28082791
MaxFixponitIterations, OREGetter, DEBUG_TYPE);
28092792

@@ -2816,11 +2799,13 @@ struct OpenMPOptCGSCCLegacyPass : public CallGraphSCCPass {
28162799

28172800
} // end anonymous namespace
28182801

2819-
void OpenMPInModule::identifyKernels(Module &M) {
2820-
2802+
KernelSet llvm::omp::getDeviceKernels(Module &M) {
2803+
// TODO: Create a more cross-platform way of determining device kernels.
28212804
NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations");
2805+
KernelSet Kernels;
2806+
28222807
if (!MD)
2823-
return;
2808+
return Kernels;
28242809

28252810
for (auto *Op : MD->operands()) {
28262811
if (Op->getNumOperands() < 2)
@@ -2838,38 +2823,24 @@ void OpenMPInModule::identifyKernels(Module &M) {
28382823

28392824
Kernels.insert(KernelFn);
28402825
}
2841-
}
28422826

2843-
bool llvm::omp::containsOpenMP(Module &M, OpenMPInModule &OMPInModule) {
2844-
if (OMPInModule.isKnown())
2845-
return OMPInModule;
2827+
return Kernels;
2828+
}
28462829

2847-
auto RecordFunctionsContainingUsesOf = [&](Function *F) {
2848-
for (User *U : F->users())
2849-
if (auto *I = dyn_cast<Instruction>(U))
2850-
OMPInModule.FuncsWithOMPRuntimeCalls.insert(I->getFunction());
2851-
};
2830+
bool llvm::omp::containsOpenMP(Module &M) {
2831+
Metadata *MD = M.getModuleFlag("openmp");
2832+
if (!MD)
2833+
return false;
28522834

2853-
// MSVC doesn't like long if-else chains for some reason and instead just
2854-
// issues an error. Work around it..
2855-
do {
2856-
#define OMP_RTL(_Enum, _Name, ...) \
2857-
if (Function *F = M.getFunction(_Name)) { \
2858-
RecordFunctionsContainingUsesOf(F); \
2859-
OMPInModule = true; \
2860-
}
2861-
#include "llvm/Frontend/OpenMP/OMPKinds.def"
2862-
} while (false);
2835+
return true;
2836+
}
28632837

2864-
// Identify kernels once. TODO: We should split the OMPInformationCache into a
2865-
// module and an SCC part. The kernel information, among other things, could
2866-
// go into the module part.
2867-
if (OMPInModule.isKnown() && OMPInModule) {
2868-
OMPInModule.identifyKernels(M);
2869-
return true;
2870-
}
2838+
bool llvm::omp::isOpenMPDevice(Module &M) {
2839+
Metadata *MD = M.getModuleFlag("openmp-device");
2840+
if (!MD)
2841+
return false;
28712842

2872-
return OMPInModule = false;
2843+
return true;
28732844
}
28742845

28752846
char OpenMPOptCGSCCLegacyPass::ID = 0;

llvm/test/Transforms/OpenMP/add_attributes.ll

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1739,3 +1739,6 @@ attributes #0 = { noinline }
17391739
; OPTIMISTIC: ; Function Attrs: convergent noinline nounwind
17401740
; OPTIMISTIC-NEXT: declare void @__kmpc_barrier_simple_spmd(%struct.ident_t* nocapture nofree readonly, i32)
17411741

1742+
!llvm.module.flags = !{!0}
1743+
1744+
!0 = !{i32 7, !"openmp", i32 50}

llvm/test/Transforms/OpenMP/add_attributes_amdgcn.ll

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,3 +26,7 @@ declare void @__kmpc_syncwarp(i64)
2626

2727
; OPTIMISTIC: ; Function Attrs: convergent nounwind
2828
; OPTIMISTIC-NEXT: declare void @__kmpc_syncwarp(i64)
29+
30+
!llvm.module.flags = !{!0}
31+
32+
!0 = !{i32 7, !"openmp", i32 50}

llvm/test/Transforms/OpenMP/deduplication.ll

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,3 +221,7 @@ entry:
221221
call void @useI32(i32 %tid5)
222222
ret void
223223
}
224+
225+
!llvm.module.flags = !{!0}
226+
227+
!0 = !{i32 7, !"openmp", i32 50}

llvm/test/Transforms/OpenMP/deduplication_remarks.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ declare !dbg !4 void @useI32(i32) local_unnamed_addr
3030
declare void @llvm.dbg.value(metadata, metadata, metadata)
3131

3232
!llvm.dbg.cu = !{!0}
33-
!llvm.module.flags = !{!8, !9, !10, !11, !12}
33+
!llvm.module.flags = !{!8, !9, !10, !11, !12, !29}
3434
!llvm.ident = !{!13}
3535

3636
!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 10.0.0 ", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !3, splitDebugInlining: false, nameTableKind: None)
@@ -62,3 +62,4 @@ declare void @llvm.dbg.value(metadata, metadata, metadata)
6262
!26 = !DILocation(line: 9, column: 10, scope: !14)
6363
!27 = !DILocation(line: 10, column: 2, scope: !14)
6464
!28 = !DILocation(line: 13, column: 1, scope: !14)
65+
!29 = !{i32 7, !"openmp", i32 50}

llvm/test/Transforms/OpenMP/globalization_remarks.ll

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,14 @@ source_filename = "declare_target_codegen_globalization.cpp"
44
target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
55
target triple = "nvptx64"
66

7+
; CHECK: remark: globalization_remarks.c:5:7: Could not move globalized variable to the stack. Variable is potentially captured.
78
; CHECK: remark: globalization_remarks.c:5:7: Found thread data sharing on the GPU. Expect degraded performance due to data globalization.
89

910
@S = external local_unnamed_addr global i8*
1011

1112
define void @foo() {
1213
entry:
13-
%0 = call i8* @__kmpc_alloc_shared(i64 4), !dbg !8
14+
%0 = call i8* @__kmpc_alloc_shared(i64 4), !dbg !10
1415
%x_on_stack = bitcast i8* %0 to i32*
1516
%1 = bitcast i32* %x_on_stack to i8*
1617
call void @share(i8* %1)
@@ -30,13 +31,17 @@ declare void @__kmpc_free_shared(i8*)
3031

3132

3233
!llvm.dbg.cu = !{!0}
33-
!llvm.module.flags = !{!3, !4}
34+
!llvm.module.flags = !{!3, !4, !5, !6}
35+
!nvvm.annotations = !{!7, !8}
3436

3537
!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 12.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None)
3638
!1 = !DIFile(filename: "globalization_remarks.c", directory: "/tmp/globalization_remarks.c")
3739
!2 = !{}
3840
!3 = !{i32 2, !"Debug Info Version", i32 3}
3941
!4 = !{i32 1, !"wchar_size", i32 4}
40-
!6 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !7, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
41-
!7 = !DISubroutineType(types: !2)
42-
!8 = !DILocation(line: 5, column: 7, scope: !6)
42+
!5 = !{i32 7, !"openmp", i32 50}
43+
!6 = !{i32 7, !"openmp-device", i32 50}
44+
!7 = !{void ()* @foo, !"kernel", i32 1}
45+
!8 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !9, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
46+
!9 = !DISubroutineType(types: !2)
47+
!10 = !DILocation(line: 5, column: 7, scope: !8)

llvm/test/Transforms/OpenMP/gpu_kernel_detection_remarks.ll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,11 @@ define void @non_kernel() {
1919
; Needed to trigger the openmp-opt pass
2020
declare dso_local void @__kmpc_kernel_prepare_parallel(i8*)
2121

22+
!llvm.module.flags = !{!4}
2223
!nvvm.annotations = !{!2, !0, !1, !3, !1, !2}
2324

2425
!0 = !{void ()* @kernel1, !"kernel", i32 1}
2526
!1 = !{void ()* @non_kernel, !"non_kernel", i32 1}
2627
!2 = !{null, !"align", i32 1}
2728
!3 = !{void ()* @kernel2, !"kernel", i32 1}
29+
!4 = !{i32 7, !"openmp", i32 50}

llvm/test/Transforms/OpenMP/gpu_state_machine_function_ptr_replacement.ll

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -275,5 +275,8 @@ declare void @__kmpc_kernel_end_parallel()
275275

276276

277277
!nvvm.annotations = !{!1}
278+
!llvm.module.flags = !{!2, !3}
278279

279280
!1 = !{void ()* @__omp_offloading_50_6dfa0f01_foo_l6, !"kernel", i32 1}
281+
!2 = !{i32 7, !"openmp", i32 50}
282+
!3 = !{i32 7, !"openmp-device", i32 50}

0 commit comments

Comments
 (0)