Skip to content

Commit b4a5543

Browse files
committed
[OpenMP] Introduce a new worksharing RTL function for distribute
This patch adds a new RTL function for worksharing. Currently we use `__kmpc_for_static_init` for both the `distribute` and `parallel` portion of the loop clause. This patch replaces the `distribute` portion with a new runtime call `__kmpc_distribute_static_init`. Currently this will be used exactly the same way, but will make it easier in the future to fine-tune the distribute and parallel portion of the loop. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D110429
1 parent be2a421 commit b4a5543

10 files changed

+618
-564
lines changed

clang/lib/CodeGen/CGOpenMPRuntime.cpp

Lines changed: 32 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1560,13 +1560,22 @@ llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
15601560
}
15611561

15621562
llvm::FunctionCallee
1563-
CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
1563+
CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned,
1564+
bool IsGPUDistribute) {
15641565
assert((IVSize == 32 || IVSize == 64) &&
15651566
"IV size is not compatible with the omp runtime");
1566-
StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1567-
: "__kmpc_for_static_init_4u")
1568-
: (IVSigned ? "__kmpc_for_static_init_8"
1569-
: "__kmpc_for_static_init_8u");
1567+
StringRef Name;
1568+
if (IsGPUDistribute)
1569+
Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4"
1570+
: "__kmpc_distribute_static_init_4u")
1571+
: (IVSigned ? "__kmpc_distribute_static_init_8"
1572+
: "__kmpc_distribute_static_init_8u");
1573+
else
1574+
Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1575+
: "__kmpc_for_static_init_4u")
1576+
: (IVSigned ? "__kmpc_for_static_init_8"
1577+
: "__kmpc_for_static_init_8u");
1578+
15701579
llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
15711580
auto *PtrTy = llvm::PointerType::getUnqual(ITy);
15721581
llvm::Type *TypeParams[] = {
@@ -2826,7 +2835,7 @@ void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
28262835
: OMP_IDENT_WORK_SECTIONS);
28272836
llvm::Value *ThreadId = getThreadID(CGF, Loc);
28282837
llvm::FunctionCallee StaticInitFunction =
2829-
createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2838+
createForStaticInitFunction(Values.IVSize, Values.IVSigned, false);
28302839
auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
28312840
emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
28322841
ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
@@ -2841,8 +2850,13 @@ void CGOpenMPRuntime::emitDistributeStaticInit(
28412850
llvm::Value *UpdatedLocation =
28422851
emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
28432852
llvm::Value *ThreadId = getThreadID(CGF, Loc);
2844-
llvm::FunctionCallee StaticInitFunction =
2845-
createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2853+
llvm::FunctionCallee StaticInitFunction;
2854+
bool isGPUDistribute =
2855+
CGM.getLangOpts().OpenMPIsDevice &&
2856+
(CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX());
2857+
StaticInitFunction = createForStaticInitFunction(
2858+
Values.IVSize, Values.IVSigned, isGPUDistribute);
2859+
28462860
emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
28472861
ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
28482862
OMPC_SCHEDULE_MODIFIER_unknown, Values);
@@ -2863,9 +2877,16 @@ void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
28632877
: OMP_IDENT_WORK_SECTIONS),
28642878
getThreadID(CGF, Loc)};
28652879
auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2866-
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2867-
CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2868-
Args);
2880+
if (isOpenMPDistributeDirective(DKind) && CGM.getLangOpts().OpenMPIsDevice &&
2881+
(CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()))
2882+
CGF.EmitRuntimeCall(
2883+
OMPBuilder.getOrCreateRuntimeFunction(
2884+
CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2885+
Args);
2886+
else
2887+
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2888+
CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2889+
Args);
28692890
}
28702891

28712892
void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,

clang/lib/CodeGen/CGOpenMPRuntime.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -795,9 +795,11 @@ class CGOpenMPRuntime {
795795
llvm::Type *getKmpc_MicroPointerTy();
796796

797797
/// Returns __kmpc_for_static_init_* runtime function for the specified
798-
/// size \a IVSize and sign \a IVSigned.
798+
/// size \a IVSize and sign \a IVSigned. Will create a distribute call
799+
/// __kmpc_distribute_static_init* if \a IsGPUDistribute is set.
799800
llvm::FunctionCallee createForStaticInitFunction(unsigned IVSize,
800-
bool IVSigned);
801+
bool IVSigned,
802+
bool IsGPUDistribute);
801803

802804
/// Returns __kmpc_dispatch_init_* runtime function for the specified
803805
/// size \a IVSize and sign \a IVSigned.

0 commit comments

Comments
 (0)