@@ -158,6 +158,8 @@ STATISTIC(NumOpenMPRuntimeFunctionUsesIdentified,
158
158
" Number of OpenMP runtime function uses identified" );
159
159
STATISTIC (NumOpenMPTargetRegionKernels,
160
160
" Number of OpenMP target region entry points (=kernels) identified" );
161
+ STATISTIC (NumNonOpenMPTargetRegionKernels,
162
+ " Number of non-OpenMP target region kernels identified" );
161
163
STATISTIC (NumOpenMPTargetRegionKernelsSPMD,
162
164
" Number of OpenMP target region entry points (=kernels) executed in "
163
165
" SPMD-mode instead of generic-mode" );
@@ -989,7 +991,7 @@ struct OpenMPOpt {
989
991
// / Print OpenMP GPU kernels for testing.
990
992
void printKernels () const {
991
993
for (Function *F : SCC) {
992
- if (!omp::isKernel (*F))
994
+ if (!omp::isOpenMPKernel (*F))
993
995
continue ;
994
996
995
997
auto Remark = [&](OptimizationRemarkAnalysis ORA) {
@@ -2030,7 +2032,7 @@ Kernel OpenMPOpt::getUniqueKernelFor(Function &F) {
2030
2032
// TODO: We should use an AA to create an (optimistic and callback
2031
2033
// call-aware) call graph. For now we stick to simple patterns that
2032
2034
// are less powerful, basically the worst fixpoint.
2033
- if (isKernel (F)) {
2035
+ if (isOpenMPKernel (F)) {
2034
2036
CachedKernel = Kernel (&F);
2035
2037
return *CachedKernel;
2036
2038
}
@@ -2721,7 +2723,7 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
2721
2723
HandleAlignedBarrier (CB);
2722
2724
2723
2725
// Handle the "kernel end barrier" for kernels too.
2724
- if (omp::isKernel (*getAnchorScope ()))
2726
+ if (omp::isOpenMPKernel (*getAnchorScope ()))
2725
2727
HandleAlignedBarrier (nullptr );
2726
2728
2727
2729
return Changed;
@@ -2974,7 +2976,7 @@ bool AAExecutionDomainFunction::handleCallees(Attributor &A,
2974
2976
} else {
2975
2977
// We could not find all predecessors, so this is either a kernel or a
2976
2978
// function with external linkage (or with some other weird uses).
2977
- if (omp::isKernel (*getAnchorScope ())) {
2979
+ if (omp::isOpenMPKernel (*getAnchorScope ())) {
2978
2980
EntryBBED.IsExecutedByInitialThreadOnly = false ;
2979
2981
EntryBBED.IsReachedFromAlignedBarrierOnly = true ;
2980
2982
EntryBBED.EncounteredNonLocalSideEffect = false ;
@@ -3028,7 +3030,7 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
3028
3030
3029
3031
Function *F = getAnchorScope ();
3030
3032
BasicBlock &EntryBB = F->getEntryBlock ();
3031
- bool IsKernel = omp::isKernel (*F);
3033
+ bool IsKernel = omp::isOpenMPKernel (*F);
3032
3034
3033
3035
SmallVector<Instruction *> SyncInstWorklist;
3034
3036
for (auto &RIt : *RPOT) {
@@ -4167,7 +4169,7 @@ struct AAKernelInfoFunction : AAKernelInfo {
4167
4169
auto *CB = cast<CallBase>(Kernel->user_back ());
4168
4170
Kernel = CB->getCaller ();
4169
4171
}
4170
- assert (omp::isKernel (*Kernel) && " Expected kernel function!" );
4172
+ assert (omp::isOpenMPKernel (*Kernel) && " Expected kernel function!" );
4171
4173
4172
4174
// Check if the kernel is already in SPMD mode, if so, return success.
4173
4175
ConstantStruct *ExistingKernelEnvC =
@@ -5804,7 +5806,9 @@ PreservedAnalyses OpenMPOptCGSCCPass::run(LazyCallGraph::SCC &C,
5804
5806
return PreservedAnalyses::all ();
5805
5807
}
5806
5808
5807
- bool llvm::omp::isKernel (Function &Fn) { return Fn.hasFnAttribute (" kernel" ); }
5809
+ bool llvm::omp::isOpenMPKernel (Function &Fn) {
5810
+ return Fn.hasFnAttribute (" kernel" );
5811
+ }
5808
5812
5809
5813
KernelSet llvm::omp::getDeviceKernels (Module &M) {
5810
5814
// TODO: Create a more cross-platform way of determining device kernels.
@@ -5826,10 +5830,13 @@ KernelSet llvm::omp::getDeviceKernels(Module &M) {
5826
5830
if (!KernelFn)
5827
5831
continue ;
5828
5832
5829
- assert (isKernel (*KernelFn) && " Inconsistent kernel function annotation" );
5830
- ++NumOpenMPTargetRegionKernels;
5831
-
5832
- Kernels.insert (KernelFn);
5833
+ // We are only interested in OpenMP target regions. Others, such as kernels
5834
+ // generated by CUDA but linked together, are not interesting to this pass.
5835
+ if (isOpenMPKernel (*KernelFn)) {
5836
+ ++NumOpenMPTargetRegionKernels;
5837
+ Kernels.insert (KernelFn);
5838
+ } else
5839
+ ++NumNonOpenMPTargetRegionKernels;
5833
5840
}
5834
5841
5835
5842
return Kernels;
0 commit comments