Skip to content

Commit 0bdde9d

Browse files
committed
[OpenMP] Make OpenMPOpt aware of the OpenMP runtime's status
The `OpenMPOpt` pass contains optimizations that generate new calls into the OpenMP runtime. This causes problems if we are in a state where the runtime has already been linked statically. Generating these new calls will result in them never being resolved. We should indicate if we are in a "post-link" LTO phase and prevent OpenMPOpt from generating new runtime calls. Generally, it's not desireable for passes to maintain state about the context in which they're called. But this is the only reasonable solution to static linking when we have a pass that generates new runtime calls. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D142646
1 parent b4b06d8 commit 0bdde9d

File tree

6 files changed

+67
-8
lines changed

6 files changed

+67
-8
lines changed

llvm/include/llvm/Transforms/IPO/OpenMPOpt.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,13 +37,25 @@ KernelSet getDeviceKernels(Module &M);
3737
/// OpenMP optimizations pass.
3838
class OpenMPOptPass : public PassInfoMixin<OpenMPOptPass> {
3939
public:
40+
OpenMPOptPass() : LTOPhase(ThinOrFullLTOPhase::None) {}
41+
OpenMPOptPass(ThinOrFullLTOPhase LTOPhase) : LTOPhase(LTOPhase) {}
42+
4043
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
44+
45+
private:
46+
const ThinOrFullLTOPhase LTOPhase = ThinOrFullLTOPhase::None;
4147
};
4248

4349
class OpenMPOptCGSCCPass : public PassInfoMixin<OpenMPOptCGSCCPass> {
4450
public:
51+
OpenMPOptCGSCCPass() : LTOPhase(ThinOrFullLTOPhase::None) {}
52+
OpenMPOptCGSCCPass(ThinOrFullLTOPhase LTOPhase) : LTOPhase(LTOPhase) {}
53+
4554
PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM,
4655
LazyCallGraph &CG, CGSCCUpdateResult &UR);
56+
57+
private:
58+
const ThinOrFullLTOPhase LTOPhase = ThinOrFullLTOPhase::None;
4759
};
4860

4961
} // end namespace llvm

llvm/lib/Passes/PassBuilderPipelines.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1604,7 +1604,7 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
16041604
}
16051605

16061606
// Try to run OpenMP optimizations, quick no-op if no OpenMP metadata present.
1607-
MPM.addPass(OpenMPOptPass());
1607+
MPM.addPass(OpenMPOptPass(ThinOrFullLTOPhase::FullLTOPostLink));
16081608

16091609
// Remove unused virtual tables to improve the quality of code generated by
16101610
// whole-program devirtualization and bitset lowering.
@@ -1808,7 +1808,8 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
18081808
addVectorPasses(Level, MainFPM, /* IsFullLTO */ true);
18091809

18101810
// Run the OpenMPOpt CGSCC pass again late.
1811-
MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(OpenMPOptCGSCCPass()));
1811+
MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
1812+
OpenMPOptCGSCCPass(ThinOrFullLTOPhase::FullLTOPostLink)));
18121813

18131814
invokePeepholeEPCallbacks(MainFPM, Level);
18141815
MainFPM.addPass(JumpThreadingPass());

llvm/lib/Passes/PassRegistry.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ MODULE_PASS("always-inline", AlwaysInlinerPass())
4444
MODULE_PASS("attributor", AttributorPass())
4545
MODULE_PASS("annotation2metadata", Annotation2MetadataPass())
4646
MODULE_PASS("openmp-opt", OpenMPOptPass())
47+
MODULE_PASS("openmp-opt-postlink", OpenMPOptPass(ThinOrFullLTOPhase::FullLTOPostLink))
4748
MODULE_PASS("called-value-propagation", CalledValuePropagationPass())
4849
MODULE_PASS("canonicalize-aliases", CanonicalizeAliasesPass())
4950
MODULE_PASS("cg-profile", CGProfilePass())

llvm/lib/Transforms/IPO/OpenMPOpt.cpp

Lines changed: 47 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -188,9 +188,9 @@ struct AAICVTracker;
188188
struct OMPInformationCache : public InformationCache {
189189
OMPInformationCache(Module &M, AnalysisGetter &AG,
190190
BumpPtrAllocator &Allocator, SetVector<Function *> *CGSCC,
191-
KernelSet &Kernels)
191+
KernelSet &Kernels, bool OpenMPPostLink)
192192
: InformationCache(M, AG, Allocator, CGSCC), OMPBuilder(M),
193-
Kernels(Kernels) {
193+
Kernels(Kernels), OpenMPPostLink(OpenMPPostLink) {
194194

195195
OMPBuilder.initialize();
196196
initializeRuntimeFunctions(M);
@@ -448,6 +448,24 @@ struct OMPInformationCache : public InformationCache {
448448
CI->setCallingConv(Fn->getCallingConv());
449449
}
450450

451+
// Helper function to determine if it's legal to create a call to the runtime
452+
// functions.
453+
bool runtimeFnsAvailable(ArrayRef<RuntimeFunction> Fns) {
454+
// We can always emit calls if we haven't yet linked in the runtime.
455+
if (!OpenMPPostLink)
456+
return true;
457+
458+
// Once the runtime has been already been linked in we cannot emit calls to
459+
// any undefined functions.
460+
for (RuntimeFunction Fn : Fns) {
461+
RuntimeFunctionInfo &RFI = RFIs[Fn];
462+
463+
if (RFI.Declaration && RFI.Declaration->isDeclaration())
464+
return false;
465+
}
466+
return true;
467+
}
468+
451469
/// Helper to initialize all runtime function information for those defined
452470
/// in OpenMPKinds.def.
453471
void initializeRuntimeFunctions(Module &M) {
@@ -523,6 +541,9 @@ struct OMPInformationCache : public InformationCache {
523541

524542
/// Collection of known OpenMP runtime functions..
525543
DenseSet<const Function *> RTLFunctions;
544+
545+
/// Indicates if we have already linked in the OpenMP device library.
546+
bool OpenMPPostLink = false;
526547
};
527548

528549
template <typename Ty, bool InsertInvalidates = true>
@@ -1412,7 +1433,10 @@ struct OpenMPOpt {
14121433
Changed |= WasSplit;
14131434
return WasSplit;
14141435
};
1415-
RFI.foreachUse(SCC, SplitMemTransfers);
1436+
if (OMPInfoCache.runtimeFnsAvailable(
1437+
{OMPRTL___tgt_target_data_begin_mapper_issue,
1438+
OMPRTL___tgt_target_data_begin_mapper_wait}))
1439+
RFI.foreachUse(SCC, SplitMemTransfers);
14161440

14171441
return Changed;
14181442
}
@@ -3912,6 +3936,12 @@ struct AAKernelInfoFunction : AAKernelInfo {
39123936
bool changeToSPMDMode(Attributor &A, ChangeStatus &Changed) {
39133937
auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
39143938

3939+
// We cannot change to SPMD mode if the runtime functions aren't availible.
3940+
if (!OMPInfoCache.runtimeFnsAvailable(
3941+
{OMPRTL___kmpc_get_hardware_thread_id_in_block,
3942+
OMPRTL___kmpc_barrier_simple_spmd}))
3943+
return false;
3944+
39153945
if (!SPMDCompatibilityTracker.isAssumed()) {
39163946
for (Instruction *NonCompatibleI : SPMDCompatibilityTracker) {
39173947
if (!NonCompatibleI)
@@ -4019,6 +4049,13 @@ struct AAKernelInfoFunction : AAKernelInfo {
40194049
if (!ReachedKnownParallelRegions.isValidState())
40204050
return ChangeStatus::UNCHANGED;
40214051

4052+
auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
4053+
if (!OMPInfoCache.runtimeFnsAvailable(
4054+
{OMPRTL___kmpc_get_hardware_num_threads_in_block,
4055+
OMPRTL___kmpc_get_warp_size, OMPRTL___kmpc_barrier_simple_generic,
4056+
OMPRTL___kmpc_kernel_parallel, OMPRTL___kmpc_kernel_end_parallel}))
4057+
return ChangeStatus::UNCHANGED;
4058+
40224059
const int InitModeArgNo = 1;
40234060
const int InitUseStateMachineArgNo = 2;
40244061

@@ -4165,7 +4202,6 @@ struct AAKernelInfoFunction : AAKernelInfo {
41654202
BranchInst::Create(IsWorkerCheckBB, UserCodeEntryBB, IsWorker, InitBB);
41664203

41674204
Module &M = *Kernel->getParent();
4168-
auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
41694205
FunctionCallee BlockHwSizeFn =
41704206
OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
41714207
M, OMPRTL___kmpc_get_hardware_num_threads_in_block);
@@ -5341,7 +5377,10 @@ PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) {
53415377
BumpPtrAllocator Allocator;
53425378
CallGraphUpdater CGUpdater;
53435379

5344-
OMPInformationCache InfoCache(M, AG, Allocator, /*CGSCC*/ nullptr, Kernels);
5380+
bool PostLink = LTOPhase == ThinOrFullLTOPhase::FullLTOPostLink ||
5381+
LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink;
5382+
OMPInformationCache InfoCache(M, AG, Allocator, /*CGSCC*/ nullptr, Kernels,
5383+
PostLink);
53455384

53465385
unsigned MaxFixpointIterations =
53475386
(isOpenMPDevice(M)) ? SetFixpointIterations : 32;
@@ -5415,9 +5454,11 @@ PreservedAnalyses OpenMPOptCGSCCPass::run(LazyCallGraph::SCC &C,
54155454
CallGraphUpdater CGUpdater;
54165455
CGUpdater.initialize(CG, C, AM, UR);
54175456

5457+
bool PostLink = LTOPhase == ThinOrFullLTOPhase::FullLTOPostLink ||
5458+
LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink;
54185459
SetVector<Function *> Functions(SCC.begin(), SCC.end());
54195460
OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, Allocator,
5420-
/*CGSCC*/ &Functions, Kernels);
5461+
/*CGSCC*/ &Functions, Kernels, PostLink);
54215462

54225463
unsigned MaxFixpointIterations =
54235464
(isOpenMPDevice(M)) ? SetFixpointIterations : 32;

llvm/test/Transforms/OpenMP/custom_state_machines_pre_lto.ll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22
; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt < %s | FileCheck %s --check-prefix=AMDGPU
33
; RUN: opt --mtriple=nvptx64-- -S -passes=openmp-opt < %s | FileCheck %s --check-prefix=NVPTX
44
; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -openmp-opt-disable-state-machine-rewrite -S -passes=openmp-opt < %s | FileCheck %s --check-prefix=AMDGPU
5+
; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt-postlink < %s | FileCheck %s --check-prefix=AMDGPU
56
; RUN: opt --mtriple=nvptx64-- -openmp-opt-disable-state-machine-rewrite -S -passes=openmp-opt < %s | FileCheck %s --check-prefix=NVPTX
7+
; RUN: opt --mtriple=nvptx64-- -S -passes=openmp-opt-postlink < %s | FileCheck %s --check-prefix=NVPTX
68

79
;; void p0(void);
810
;; void p1(void);

llvm/test/Transforms/OpenMP/spmdization.ll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22
; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt < %s | FileCheck %s --check-prefixes=AMDGPU
33
; RUN: opt --mtriple=nvptx64-- -S -passes=openmp-opt < %s | FileCheck %s --check-prefixes=NVPTX
44
; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt -openmp-opt-disable-spmdization < %s | FileCheck %s --check-prefix=AMDGPU-DISABLED
5+
; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt-postlink < %s | FileCheck %s --check-prefix=AMDGPU-DISABLED
56
; RUN: opt --mtriple=nvptx64-- -S -passes=openmp-opt -openmp-opt-disable-spmdization < %s | FileCheck %s --check-prefix=NVPTX-DISABLED
7+
; RUN: opt --mtriple=nvptx64-- -S -passes=openmp-opt-postlink < %s | FileCheck %s --check-prefix=NVPTX-DISABLED
68

79
;; void unknown(void);
810
;; void spmd_amenable(void) __attribute__((assume("ompx_spmd_amenable")));

0 commit comments

Comments
 (0)