Skip to content

Commit c21ceac

Browse files
SC llvm teamSC llvm team
authored andcommitted
Merged main:186a4b3b6578 into amd-gfx:7ad30b56268e
Local branch amd-gfx 7ad30b5 Merged main:3f0bddb56ac3 into amd-gfx:32727405140d Remote branch main 186a4b3 [LLVM][OpenMP] Allow OpenMPOpt to handle non-OpenMP target regions (llvm#67075)
2 parents 7ad30b5 + 186a4b3 commit c21ceac

File tree

4 files changed

+51
-14
lines changed

4 files changed

+51
-14
lines changed

llvm/include/llvm/Config/llvm-config.h.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
/* Indicate that this is LLVM compiled from the amd-gfx branch. */
1818
#define LLVM_HAVE_BRANCH_AMD_GFX
19-
#define LLVM_MAIN_REVISION 475744
19+
#define LLVM_MAIN_REVISION 475745
2020

2121
/* Define if LLVM_ENABLE_DUMP is enabled */
2222
#cmakedefine LLVM_ENABLE_DUMP

llvm/include/llvm/Transforms/IPO/OpenMPOpt.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,9 @@ bool containsOpenMP(Module &M);
2929
/// Helper to determine if \p M is a OpenMP target offloading device module.
3030
bool isOpenMPDevice(Module &M);
3131

32-
/// Return true iff \p Fn is a GPU kernel; \p Fn has the "kernel" attribute.
33-
bool isKernel(Function &Fn);
32+
/// Return true iff \p Fn is an OpenMP GPU kernel; \p Fn has the "kernel"
33+
/// attribute.
34+
bool isOpenMPKernel(Function &Fn);
3435

3536
/// Get OpenMP device kernels in \p M.
3637
KernelSet getDeviceKernels(Module &M);

llvm/lib/Transforms/IPO/OpenMPOpt.cpp

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,8 @@ STATISTIC(NumOpenMPRuntimeFunctionUsesIdentified,
158158
"Number of OpenMP runtime function uses identified");
159159
STATISTIC(NumOpenMPTargetRegionKernels,
160160
"Number of OpenMP target region entry points (=kernels) identified");
161+
STATISTIC(NumNonOpenMPTargetRegionKernels,
162+
"Number of non-OpenMP target region kernels identified");
161163
STATISTIC(NumOpenMPTargetRegionKernelsSPMD,
162164
"Number of OpenMP target region entry points (=kernels) executed in "
163165
"SPMD-mode instead of generic-mode");
@@ -989,7 +991,7 @@ struct OpenMPOpt {
989991
/// Print OpenMP GPU kernels for testing.
990992
void printKernels() const {
991993
for (Function *F : SCC) {
992-
if (!omp::isKernel(*F))
994+
if (!omp::isOpenMPKernel(*F))
993995
continue;
994996

995997
auto Remark = [&](OptimizationRemarkAnalysis ORA) {
@@ -2030,7 +2032,7 @@ Kernel OpenMPOpt::getUniqueKernelFor(Function &F) {
20302032
// TODO: We should use an AA to create an (optimistic and callback
20312033
// call-aware) call graph. For now we stick to simple patterns that
20322034
// are less powerful, basically the worst fixpoint.
2033-
if (isKernel(F)) {
2035+
if (isOpenMPKernel(F)) {
20342036
CachedKernel = Kernel(&F);
20352037
return *CachedKernel;
20362038
}
@@ -2721,7 +2723,7 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
27212723
HandleAlignedBarrier(CB);
27222724

27232725
// Handle the "kernel end barrier" for kernels too.
2724-
if (omp::isKernel(*getAnchorScope()))
2726+
if (omp::isOpenMPKernel(*getAnchorScope()))
27252727
HandleAlignedBarrier(nullptr);
27262728

27272729
return Changed;
@@ -2974,7 +2976,7 @@ bool AAExecutionDomainFunction::handleCallees(Attributor &A,
29742976
} else {
29752977
// We could not find all predecessors, so this is either a kernel or a
29762978
// function with external linkage (or with some other weird uses).
2977-
if (omp::isKernel(*getAnchorScope())) {
2979+
if (omp::isOpenMPKernel(*getAnchorScope())) {
29782980
EntryBBED.IsExecutedByInitialThreadOnly = false;
29792981
EntryBBED.IsReachedFromAlignedBarrierOnly = true;
29802982
EntryBBED.EncounteredNonLocalSideEffect = false;
@@ -3028,7 +3030,7 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
30283030

30293031
Function *F = getAnchorScope();
30303032
BasicBlock &EntryBB = F->getEntryBlock();
3031-
bool IsKernel = omp::isKernel(*F);
3033+
bool IsKernel = omp::isOpenMPKernel(*F);
30323034

30333035
SmallVector<Instruction *> SyncInstWorklist;
30343036
for (auto &RIt : *RPOT) {
@@ -4167,7 +4169,7 @@ struct AAKernelInfoFunction : AAKernelInfo {
41674169
auto *CB = cast<CallBase>(Kernel->user_back());
41684170
Kernel = CB->getCaller();
41694171
}
4170-
assert(omp::isKernel(*Kernel) && "Expected kernel function!");
4172+
assert(omp::isOpenMPKernel(*Kernel) && "Expected kernel function!");
41714173

41724174
// Check if the kernel is already in SPMD mode, if so, return success.
41734175
ConstantStruct *ExistingKernelEnvC =
@@ -5804,7 +5806,9 @@ PreservedAnalyses OpenMPOptCGSCCPass::run(LazyCallGraph::SCC &C,
58045806
return PreservedAnalyses::all();
58055807
}
58065808

5807-
bool llvm::omp::isKernel(Function &Fn) { return Fn.hasFnAttribute("kernel"); }
5809+
bool llvm::omp::isOpenMPKernel(Function &Fn) {
5810+
return Fn.hasFnAttribute("kernel");
5811+
}
58085812

58095813
KernelSet llvm::omp::getDeviceKernels(Module &M) {
58105814
// TODO: Create a more cross-platform way of determining device kernels.
@@ -5826,10 +5830,13 @@ KernelSet llvm::omp::getDeviceKernels(Module &M) {
58265830
if (!KernelFn)
58275831
continue;
58285832

5829-
assert(isKernel(*KernelFn) && "Inconsistent kernel function annotation");
5830-
++NumOpenMPTargetRegionKernels;
5831-
5832-
Kernels.insert(KernelFn);
5833+
// We are only interested in OpenMP target regions. Others, such as kernels
5834+
// generated by CUDA but linked together, are not interesting to this pass.
5835+
if (isOpenMPKernel(*KernelFn)) {
5836+
++NumOpenMPTargetRegionKernels;
5837+
Kernels.insert(KernelFn);
5838+
} else
5839+
++NumNonOpenMPTargetRegionKernels;
58335840
}
58345841

58355842
return Kernels;
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --version 3
2+
; RUN: opt < %s -S -passes=openmp-opt | FileCheck %s
3+
4+
source_filename = "bug66687.ll"
5+
target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
6+
target triple = "nvptx64-nvidia-cuda"
7+
8+
define weak void @openmp_kernel() "kernel" {
9+
; CHECK-LABEL: define weak void @openmp_kernel(
10+
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
11+
; CHECK-NEXT: ret void
12+
;
13+
ret void
14+
}
15+
16+
define weak_odr void @non_openmp_kernel() {
17+
; CHECK-LABEL: define weak_odr void @non_openmp_kernel() {
18+
; CHECK-NEXT: ret void
19+
;
20+
ret void
21+
}
22+
23+
!llvm.module.flags = !{!0, !1}
24+
!nvvm.annotations = !{!2, !3}
25+
26+
!0 = !{i32 7, !"openmp", i32 51}
27+
!1 = !{i32 7, !"openmp-device", i32 51}
28+
!2 = !{ptr @openmp_kernel, !"kernel", i32 1}
29+
!3 = !{ptr @non_openmp_kernel, !"kernel", i32 1}

0 commit comments

Comments
 (0)