@@ -6768,7 +6768,7 @@ static Function *emitTargetTaskProxyFunction(OpenMPIRBuilder &OMPBuilder,
6768
6768
return ProxyFn;
6769
6769
}
6770
6770
static void emitTargetOutlinedFunction (
6771
- OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder,
6771
+ OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, bool IsOffloadEntry,
6772
6772
TargetRegionEntryInfo &EntryInfo, Function *&OutlinedFn,
6773
6773
Constant *&OutlinedFnID, SmallVectorImpl<Value *> &Inputs,
6774
6774
OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc,
@@ -6781,8 +6781,8 @@ static void emitTargetOutlinedFunction(
6781
6781
CBFunc, ArgAccessorFuncCB);
6782
6782
};
6783
6783
6784
- OMPBuilder.emitTargetRegionFunction (EntryInfo, GenerateOutlinedFunction, true ,
6785
- OutlinedFn, OutlinedFnID);
6784
+ OMPBuilder.emitTargetRegionFunction (EntryInfo, GenerateOutlinedFunction,
6785
+ IsOffloadEntry, OutlinedFn, OutlinedFnID);
6786
6786
}
6787
6787
OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitTargetTask (
6788
6788
Function *OutlinedFn, Value *OutlinedFnID,
@@ -6898,15 +6898,22 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitTargetTask(
6898
6898
6899
6899
Builder.restoreIP (TargetTaskBodyIP);
6900
6900
6901
- // emitKernelLaunch makes the necessary runtime call to offload the kernel.
6902
- // We then outline all that code into a separate function
6903
- // ('kernel_launch_function' in the pseudo code above). This function is then
6904
- // called by the target task proxy function (see
6905
- // '@.omp_target_task_proxy_func' in the pseudo code above)
6906
- // "@.omp_target_task_proxy_func' is generated by emitTargetTaskProxyFunction
6907
- Builder.restoreIP (emitKernelLaunch (Builder, OutlinedFn, OutlinedFnID,
6908
- EmitTargetCallFallbackCB, Args, DeviceID,
6909
- RTLoc, TargetTaskAllocaIP));
6901
+ if (OutlinedFnID) {
6902
+ // emitKernelLaunch makes the necessary runtime call to offload the kernel.
6903
+ // We then outline all that code into a separate function
6904
+ // ('kernel_launch_function' in the pseudo code above). This function is
6905
+ // then called by the target task proxy function (see
6906
+ // '@.omp_target_task_proxy_func' in the pseudo code above)
6907
+ // "@.omp_target_task_proxy_func' is generated by
6908
+ // emitTargetTaskProxyFunction.
6909
+ Builder.restoreIP (emitKernelLaunch (Builder, OutlinedFn, OutlinedFnID,
6910
+ EmitTargetCallFallbackCB, Args, DeviceID,
6911
+ RTLoc, TargetTaskAllocaIP));
6912
+ } else {
6913
+ // When OutlinedFnID is set to nullptr, then it's not an offloading call. In
6914
+ // this case, we execute the host implementation directly.
6915
+ Builder.restoreIP (EmitTargetCallFallbackCB (Builder.saveIP ()));
6916
+ }
6910
6917
6911
6918
OI.ExitBB = Builder.saveIP ().getBlock ();
6912
6919
OI.PostOutlineCB = [this , ToBeDeleted, Dependencies,
@@ -7015,11 +7022,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitTargetTask(
7015
7022
Function *TaskCompleteFn =
7016
7023
getOrCreateRuntimeFunctionPtr (OMPRTL___kmpc_omp_task_complete_if0);
7017
7024
Builder.CreateCall (TaskBeginFn, {Ident, ThreadID, TaskData});
7018
- CallInst *CI = nullptr ;
7019
- if (HasShareds)
7020
- CI = Builder.CreateCall (ProxyFn, {ThreadID, TaskData});
7021
- else
7022
- CI = Builder.CreateCall (ProxyFn, {ThreadID});
7025
+ CallInst *CI = Builder.CreateCall (ProxyFn, {ThreadID, TaskData});
7023
7026
CI->setDebugLoc (StaleCI->getDebugLoc ());
7024
7027
Builder.CreateCall (TaskCompleteFn, {Ident, ThreadID, TaskData});
7025
7028
} else if (DepArray) {
@@ -7052,6 +7055,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitTargetTask(
7052
7055
<< " \n " );
7053
7056
return Builder.saveIP ();
7054
7057
}
7058
+
7055
7059
void OpenMPIRBuilder::emitOffloadingArraysAndArgs (
7056
7060
InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info,
7057
7061
TargetDataRTArgs &RTArgs, MapInfosTy &CombinedInfo, bool IsNonContiguous,
@@ -7069,6 +7073,37 @@ static void emitTargetCall(
7069
7073
SmallVectorImpl<Value *> &Args,
7070
7074
OpenMPIRBuilder::GenMapInfoCallbackTy GenMapInfoCB,
7071
7075
SmallVector<llvm::OpenMPIRBuilder::DependData> Dependencies = {}) {
7076
+ // Generate a function call to the host fallback implementation of the target
7077
+ // region. This is called by the host when no offload entry was generated for
7078
+ // the target region and when the offloading call fails at runtime.
7079
+ auto &&EmitTargetCallFallbackCB =
7080
+ [&](OpenMPIRBuilder::InsertPointTy IP) -> OpenMPIRBuilder::InsertPointTy {
7081
+ Builder.restoreIP (IP);
7082
+ Builder.CreateCall (OutlinedFn, Args);
7083
+ return Builder.saveIP ();
7084
+ };
7085
+
7086
+ bool HasNoWait = false ;
7087
+ bool HasDependencies = Dependencies.size () > 0 ;
7088
+ bool RequiresOuterTargetTask = HasNoWait || HasDependencies;
7089
+
7090
+ // If we don't have an ID for the target region, it means an offload entry
7091
+ // wasn't created. In this case we just run the host fallback directly.
7092
+ if (!OutlinedFnID) {
7093
+ if (RequiresOuterTargetTask) {
7094
+ // Arguments that are intended to be directly forwarded to an
7095
+ // emitKernelLaunch call are pased as nullptr, since OutlinedFnID=nullptr
7096
+ // results in that call not being done.
7097
+ OpenMPIRBuilder::TargetKernelArgs KArgs;
7098
+ Builder.restoreIP (OMPBuilder.emitTargetTask (
7099
+ OutlinedFn, /* OutlinedFnID=*/ nullptr , EmitTargetCallFallbackCB, KArgs,
7100
+ /* DeviceID=*/ nullptr , /* RTLoc=*/ nullptr , AllocaIP, Dependencies,
7101
+ HasNoWait));
7102
+ } else {
7103
+ Builder.restoreIP (EmitTargetCallFallbackCB (Builder.saveIP ()));
7104
+ }
7105
+ return ;
7106
+ }
7072
7107
7073
7108
OpenMPIRBuilder::TargetDataInfo Info (
7074
7109
/* RequiresDevicePointerInfo=*/ false ,
@@ -7081,14 +7116,6 @@ static void emitTargetCall(
7081
7116
/* IsNonContiguous=*/ true ,
7082
7117
/* ForEndCall=*/ false );
7083
7118
7084
- // emitKernelLaunch
7085
- auto &&EmitTargetCallFallbackCB =
7086
- [&](OpenMPIRBuilder::InsertPointTy IP) -> OpenMPIRBuilder::InsertPointTy {
7087
- Builder.restoreIP (IP);
7088
- Builder.CreateCall (OutlinedFn, Args);
7089
- return Builder.saveIP ();
7090
- };
7091
-
7092
7119
unsigned NumTargetItems = Info.NumberOfPtrs ;
7093
7120
// TODO: Use correct device ID
7094
7121
Value *DeviceID = Builder.getInt64 (OMP_DEVICEID_UNDEF);
@@ -7103,10 +7130,6 @@ static void emitTargetCall(
7103
7130
// TODO: Use correct DynCGGroupMem
7104
7131
Value *DynCGGroupMem = Builder.getInt32 (0 );
7105
7132
7106
- bool HasNoWait = false ;
7107
- bool HasDependencies = Dependencies.size () > 0 ;
7108
- bool RequiresOuterTargetTask = HasNoWait || HasDependencies;
7109
-
7110
7133
OpenMPIRBuilder::TargetKernelArgs KArgs (NumTargetItems, RTArgs, NumIterations,
7111
7134
NumTeamsVal, NumThreadsVal,
7112
7135
DynCGGroupMem, HasNoWait);
@@ -7123,8 +7146,9 @@ static void emitTargetCall(
7123
7146
DeviceID, RTLoc, AllocaIP));
7124
7147
}
7125
7148
}
7149
+
7126
7150
OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTarget (
7127
- const LocationDescription &Loc, InsertPointTy AllocaIP,
7151
+ const LocationDescription &Loc, bool IsOffloadEntry, InsertPointTy AllocaIP,
7128
7152
InsertPointTy CodeGenIP, TargetRegionEntryInfo &EntryInfo, int32_t NumTeams,
7129
7153
int32_t NumThreads, SmallVectorImpl<Value *> &Args,
7130
7154
GenMapInfoCallbackTy GenMapInfoCB,
@@ -7138,12 +7162,13 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTarget(
7138
7162
Builder.restoreIP (CodeGenIP);
7139
7163
7140
7164
Function *OutlinedFn;
7141
- Constant *OutlinedFnID;
7165
+ Constant *OutlinedFnID = nullptr ;
7142
7166
// The target region is outlined into its own function. The LLVM IR for
7143
7167
// the target region itself is generated using the callbacks CBFunc
7144
7168
// and ArgAccessorFuncCB
7145
- emitTargetOutlinedFunction (*this , Builder, EntryInfo, OutlinedFn,
7146
- OutlinedFnID, Args, CBFunc, ArgAccessorFuncCB);
7169
+ emitTargetOutlinedFunction (*this , Builder, IsOffloadEntry, EntryInfo,
7170
+ OutlinedFn, OutlinedFnID, Args, CBFunc,
7171
+ ArgAccessorFuncCB);
7147
7172
7148
7173
// If we are not on the target device, then we need to generate code
7149
7174
// to make a remote call (offload) to the previously outlined function
0 commit comments