@@ -188,9 +188,9 @@ struct AAICVTracker;
188
188
struct OMPInformationCache : public InformationCache {
189
189
OMPInformationCache (Module &M, AnalysisGetter &AG,
190
190
BumpPtrAllocator &Allocator, SetVector<Function *> *CGSCC,
191
- KernelSet &Kernels)
191
+ KernelSet &Kernels, bool OpenMPPostLink )
192
192
: InformationCache(M, AG, Allocator, CGSCC), OMPBuilder(M),
193
- Kernels (Kernels) {
193
+ Kernels (Kernels), OpenMPPostLink(OpenMPPostLink) {
194
194
195
195
OMPBuilder.initialize ();
196
196
initializeRuntimeFunctions (M);
@@ -448,6 +448,24 @@ struct OMPInformationCache : public InformationCache {
448
448
CI->setCallingConv (Fn->getCallingConv ());
449
449
}
450
450
451
+ // Helper function to determine if it's legal to create a call to the runtime
452
+ // functions.
453
+ bool runtimeFnsAvailable (ArrayRef<RuntimeFunction> Fns) {
454
+ // We can always emit calls if we haven't yet linked in the runtime.
455
+ if (!OpenMPPostLink)
456
+ return true ;
457
+
458
+ // Once the runtime has been already been linked in we cannot emit calls to
459
+ // any undefined functions.
460
+ for (RuntimeFunction Fn : Fns) {
461
+ RuntimeFunctionInfo &RFI = RFIs[Fn];
462
+
463
+ if (RFI.Declaration && RFI.Declaration ->isDeclaration ())
464
+ return false ;
465
+ }
466
+ return true ;
467
+ }
468
+
451
469
// / Helper to initialize all runtime function information for those defined
452
470
// / in OpenMPKinds.def.
453
471
void initializeRuntimeFunctions (Module &M) {
@@ -523,6 +541,9 @@ struct OMPInformationCache : public InformationCache {
523
541
524
542
// / Collection of known OpenMP runtime functions..
525
543
DenseSet<const Function *> RTLFunctions;
544
+
545
+ // / Indicates if we have already linked in the OpenMP device library.
546
+ bool OpenMPPostLink = false ;
526
547
};
527
548
528
549
template <typename Ty, bool InsertInvalidates = true >
@@ -1412,7 +1433,10 @@ struct OpenMPOpt {
1412
1433
Changed |= WasSplit;
1413
1434
return WasSplit;
1414
1435
};
1415
- RFI.foreachUse (SCC, SplitMemTransfers);
1436
+ if (OMPInfoCache.runtimeFnsAvailable (
1437
+ {OMPRTL___tgt_target_data_begin_mapper_issue,
1438
+ OMPRTL___tgt_target_data_begin_mapper_wait}))
1439
+ RFI.foreachUse (SCC, SplitMemTransfers);
1416
1440
1417
1441
return Changed;
1418
1442
}
@@ -3912,6 +3936,12 @@ struct AAKernelInfoFunction : AAKernelInfo {
3912
3936
bool changeToSPMDMode (Attributor &A, ChangeStatus &Changed) {
3913
3937
auto &OMPInfoCache = static_cast <OMPInformationCache &>(A.getInfoCache ());
3914
3938
3939
+ // We cannot change to SPMD mode if the runtime functions aren't availible.
3940
+ if (!OMPInfoCache.runtimeFnsAvailable (
3941
+ {OMPRTL___kmpc_get_hardware_thread_id_in_block,
3942
+ OMPRTL___kmpc_barrier_simple_spmd}))
3943
+ return false ;
3944
+
3915
3945
if (!SPMDCompatibilityTracker.isAssumed ()) {
3916
3946
for (Instruction *NonCompatibleI : SPMDCompatibilityTracker) {
3917
3947
if (!NonCompatibleI)
@@ -4019,6 +4049,13 @@ struct AAKernelInfoFunction : AAKernelInfo {
4019
4049
if (!ReachedKnownParallelRegions.isValidState ())
4020
4050
return ChangeStatus::UNCHANGED;
4021
4051
4052
+ auto &OMPInfoCache = static_cast <OMPInformationCache &>(A.getInfoCache ());
4053
+ if (!OMPInfoCache.runtimeFnsAvailable (
4054
+ {OMPRTL___kmpc_get_hardware_num_threads_in_block,
4055
+ OMPRTL___kmpc_get_warp_size, OMPRTL___kmpc_barrier_simple_generic,
4056
+ OMPRTL___kmpc_kernel_parallel, OMPRTL___kmpc_kernel_end_parallel}))
4057
+ return ChangeStatus::UNCHANGED;
4058
+
4022
4059
const int InitModeArgNo = 1 ;
4023
4060
const int InitUseStateMachineArgNo = 2 ;
4024
4061
@@ -4165,7 +4202,6 @@ struct AAKernelInfoFunction : AAKernelInfo {
4165
4202
BranchInst::Create (IsWorkerCheckBB, UserCodeEntryBB, IsWorker, InitBB);
4166
4203
4167
4204
Module &M = *Kernel->getParent ();
4168
- auto &OMPInfoCache = static_cast <OMPInformationCache &>(A.getInfoCache ());
4169
4205
FunctionCallee BlockHwSizeFn =
4170
4206
OMPInfoCache.OMPBuilder .getOrCreateRuntimeFunction (
4171
4207
M, OMPRTL___kmpc_get_hardware_num_threads_in_block);
@@ -5341,7 +5377,10 @@ PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) {
5341
5377
BumpPtrAllocator Allocator;
5342
5378
CallGraphUpdater CGUpdater;
5343
5379
5344
- OMPInformationCache InfoCache (M, AG, Allocator, /* CGSCC*/ nullptr , Kernels);
5380
+ bool PostLink = LTOPhase == ThinOrFullLTOPhase::FullLTOPostLink ||
5381
+ LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink;
5382
+ OMPInformationCache InfoCache (M, AG, Allocator, /* CGSCC*/ nullptr , Kernels,
5383
+ PostLink);
5345
5384
5346
5385
unsigned MaxFixpointIterations =
5347
5386
(isOpenMPDevice (M)) ? SetFixpointIterations : 32 ;
@@ -5415,9 +5454,11 @@ PreservedAnalyses OpenMPOptCGSCCPass::run(LazyCallGraph::SCC &C,
5415
5454
CallGraphUpdater CGUpdater;
5416
5455
CGUpdater.initialize (CG, C, AM, UR);
5417
5456
5457
+ bool PostLink = LTOPhase == ThinOrFullLTOPhase::FullLTOPostLink ||
5458
+ LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink;
5418
5459
SetVector<Function *> Functions (SCC.begin (), SCC.end ());
5419
5460
OMPInformationCache InfoCache (*(Functions.back ()->getParent ()), AG, Allocator,
5420
- /* CGSCC*/ &Functions, Kernels);
5461
+ /* CGSCC*/ &Functions, Kernels, PostLink );
5421
5462
5422
5463
unsigned MaxFixpointIterations =
5423
5464
(isOpenMPDevice (M)) ? SetFixpointIterations : 32 ;
0 commit comments