@@ -1560,13 +1560,22 @@ llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1560
1560
}
1561
1561
1562
1562
llvm::FunctionCallee
1563
- CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
1563
+ CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned,
1564
+ bool IsGPUDistribute) {
1564
1565
assert((IVSize == 32 || IVSize == 64) &&
1565
1566
"IV size is not compatible with the omp runtime");
1566
- StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1567
- : "__kmpc_for_static_init_4u")
1568
- : (IVSigned ? "__kmpc_for_static_init_8"
1569
- : "__kmpc_for_static_init_8u");
1567
+ StringRef Name;
1568
+ if (IsGPUDistribute)
1569
+ Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4"
1570
+ : "__kmpc_distribute_static_init_4u")
1571
+ : (IVSigned ? "__kmpc_distribute_static_init_8"
1572
+ : "__kmpc_distribute_static_init_8u");
1573
+ else
1574
+ Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1575
+ : "__kmpc_for_static_init_4u")
1576
+ : (IVSigned ? "__kmpc_for_static_init_8"
1577
+ : "__kmpc_for_static_init_8u");
1578
+
1570
1579
llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1571
1580
auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1572
1581
llvm::Type *TypeParams[] = {
@@ -2826,7 +2835,7 @@ void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2826
2835
: OMP_IDENT_WORK_SECTIONS);
2827
2836
llvm::Value *ThreadId = getThreadID(CGF, Loc);
2828
2837
llvm::FunctionCallee StaticInitFunction =
2829
- createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2838
+ createForStaticInitFunction(Values.IVSize, Values.IVSigned, false );
2830
2839
auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2831
2840
emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2832
2841
ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
@@ -2841,8 +2850,13 @@ void CGOpenMPRuntime::emitDistributeStaticInit(
2841
2850
llvm::Value *UpdatedLocation =
2842
2851
emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2843
2852
llvm::Value *ThreadId = getThreadID(CGF, Loc);
2844
- llvm::FunctionCallee StaticInitFunction =
2845
- createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2853
+ llvm::FunctionCallee StaticInitFunction;
2854
+ bool isGPUDistribute =
2855
+ CGM.getLangOpts().OpenMPIsDevice &&
2856
+ (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX());
2857
+ StaticInitFunction = createForStaticInitFunction(
2858
+ Values.IVSize, Values.IVSigned, isGPUDistribute);
2859
+
2846
2860
emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2847
2861
ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2848
2862
OMPC_SCHEDULE_MODIFIER_unknown, Values);
@@ -2863,9 +2877,16 @@ void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2863
2877
: OMP_IDENT_WORK_SECTIONS),
2864
2878
getThreadID(CGF, Loc)};
2865
2879
auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2866
- CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2867
- CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2868
- Args);
2880
+ if (isOpenMPDistributeDirective(DKind) && CGM.getLangOpts().OpenMPIsDevice &&
2881
+ (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()))
2882
+ CGF.EmitRuntimeCall(
2883
+ OMPBuilder.getOrCreateRuntimeFunction(
2884
+ CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2885
+ Args);
2886
+ else
2887
+ CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2888
+ CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2889
+ Args);
2869
2890
}
2870
2891
2871
2892
void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
0 commit comments