Skip to content

Commit cdacec7

Browse files
committed
Fix dynamic schedule tracking
1 parent 592abf2 commit cdacec7

26 files changed

+2798
-2117
lines changed

clang/lib/CodeGen/CGOpenMPRuntime.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2553,6 +2553,15 @@ void CGOpenMPRuntime::emitForDispatchInit(
25532553
Args);
25542554
}
25552555

2556+
void CGOpenMPRuntime::emitForDispatchDeinit(CodeGenFunction &CGF,
2557+
SourceLocation Loc) {
2558+
if (!CGF.HaveInsertPoint())
2559+
return;
2560+
// Call __kmpc_dispatch_deinit(ident_t *loc, kmp_int32 tid);
2561+
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2562+
CGF.EmitRuntimeCall(OMPBuilder.createDispatchDeinitFunction(), Args);
2563+
}
2564+
25562565
static void emitForStaticInitCall(
25572566
CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
25582567
llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
@@ -11996,6 +12005,11 @@ void CGOpenMPSIMDRuntime::emitForDispatchInit(
1199612005
llvm_unreachable("Not supported in SIMD-only mode");
1199712006
}
1199812007

12008+
void CGOpenMPSIMDRuntime::emitForDispatchDeinit(CodeGenFunction &CGF,
12009+
SourceLocation Loc) {
12010+
llvm_unreachable("Not supported in SIMD-only mode");
12011+
}
12012+
1199912013
void CGOpenMPSIMDRuntime::emitForStaticInit(
1200012014
CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
1200112015
const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {

clang/lib/CodeGen/CGOpenMPRuntime.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -946,6 +946,14 @@ class CGOpenMPRuntime {
946946
unsigned IVSize, bool IVSigned, bool Ordered,
947947
const DispatchRTInput &DispatchValues);
948948

949+
/// This is used for non static scheduled types and when the ordered
950+
/// clause is present on the loop construct.
951+
///
952+
/// \param CGF Reference to current CodeGenFunction.
953+
/// \param Loc Clang source location.
954+
///
955+
virtual void emitForDispatchDeinit(CodeGenFunction &CGF, SourceLocation Loc);
956+
949957
/// Struct with the values to be passed to the static runtime function
950958
struct StaticRTInput {
951959
/// Size of the iteration variable in bits.
@@ -1829,6 +1837,14 @@ class CGOpenMPSIMDRuntime final : public CGOpenMPRuntime {
18291837
unsigned IVSize, bool IVSigned, bool Ordered,
18301838
const DispatchRTInput &DispatchValues) override;
18311839

1840+
/// This is used for non static scheduled types and when the ordered
1841+
/// clause is present on the loop construct.
1842+
///
1843+
/// \param CGF Reference to current CodeGenFunction.
1844+
/// \param Loc Clang source location.
1845+
///
1846+
void emitForDispatchDeinit(CodeGenFunction &CGF, SourceLocation Loc) override;
1847+
18321848
/// Call the appropriate runtime routine to initialize it before start
18331849
/// of loop.
18341850
///

clang/lib/CodeGen/CGStmtOpenMP.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2985,12 +2985,14 @@ void CodeGenFunction::EmitOMPForOuterLoop(
29852985
// run-sched-var ICV. If the ICV is set to auto, the schedule is
29862986
// implementation defined
29872987
//
2988+
// __kmpc_dispatch_init();
29882989
// while(__kmpc_dispatch_next(&LB, &UB)) {
29892990
// idx = LB;
29902991
// while (idx <= UB) { BODY; ++idx;
29912992
// __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
29922993
// } // inner loop
29932994
// }
2995+
// __kmpc_dispatch_deinit();
29942996
//
29952997
// OpenMP [2.7.1, Loop Construct, Description, table 2-1]
29962998
// When schedule(static, chunk_size) is specified, iterations are divided into
@@ -3044,6 +3046,9 @@ void CodeGenFunction::EmitOMPForOuterLoop(
30443046
OuterLoopArgs.DKind = LoopArgs.DKind;
30453047
EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs,
30463048
emitOMPLoopBodyWithStopPoint, CodeGenOrdered);
3049+
if (DynamicOrOrdered) {
3050+
RT.emitForDispatchDeinit(*this, S.getBeginLoc());
3051+
}
30473052
}
30483053

30493054
static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc,

clang/test/OpenMP/distribute_parallel_for_codegen.cpp

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2283,6 +2283,9 @@ int main() {
22832283
// CHECK1: omp.dispatch.inc:
22842284
// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]]
22852285
// CHECK1: omp.dispatch.end:
2286+
// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
2287+
// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4
2288+
// CHECK1-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP35]])
22862289
// CHECK1-NEXT: br label [[OMP_PRECOND_END]]
22872290
// CHECK1: omp.precond.end:
22882291
// CHECK1-NEXT: ret void
@@ -2533,6 +2536,9 @@ int main() {
25332536
// CHECK1: omp.dispatch.inc:
25342537
// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]]
25352538
// CHECK1: omp.dispatch.end:
2539+
// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
2540+
// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
2541+
// CHECK1-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP36]])
25362542
// CHECK1-NEXT: br label [[OMP_PRECOND_END]]
25372543
// CHECK1: omp.precond.end:
25382544
// CHECK1-NEXT: ret void
@@ -4010,6 +4016,9 @@ int main() {
40104016
// CHECK3: omp.dispatch.inc:
40114017
// CHECK3-NEXT: br label [[OMP_DISPATCH_COND]]
40124018
// CHECK3: omp.dispatch.end:
4019+
// CHECK3-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
4020+
// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4
4021+
// CHECK3-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP35]])
40134022
// CHECK3-NEXT: br label [[OMP_PRECOND_END]]
40144023
// CHECK3: omp.precond.end:
40154024
// CHECK3-NEXT: ret void
@@ -4253,6 +4262,9 @@ int main() {
42534262
// CHECK3: omp.dispatch.inc:
42544263
// CHECK3-NEXT: br label [[OMP_DISPATCH_COND]]
42554264
// CHECK3: omp.dispatch.end:
4265+
// CHECK3-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
4266+
// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
4267+
// CHECK3-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP36]])
42564268
// CHECK3-NEXT: br label [[OMP_PRECOND_END]]
42574269
// CHECK3: omp.precond.end:
42584270
// CHECK3-NEXT: ret void
@@ -6314,6 +6326,9 @@ int main() {
63146326
// CHECK9: omp.dispatch.inc:
63156327
// CHECK9-NEXT: br label [[OMP_DISPATCH_COND]]
63166328
// CHECK9: omp.dispatch.end:
6329+
// CHECK9-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
6330+
// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4
6331+
// CHECK9-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP31]])
63176332
// CHECK9-NEXT: br label [[OMP_PRECOND_END]]
63186333
// CHECK9: omp.precond.end:
63196334
// CHECK9-NEXT: ret void
@@ -6554,6 +6569,9 @@ int main() {
65546569
// CHECK9: omp.dispatch.inc:
65556570
// CHECK9-NEXT: br label [[OMP_DISPATCH_COND]]
65566571
// CHECK9: omp.dispatch.end:
6572+
// CHECK9-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
6573+
// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4
6574+
// CHECK9-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP32]])
65576575
// CHECK9-NEXT: br label [[OMP_PRECOND_END]]
65586576
// CHECK9: omp.precond.end:
65596577
// CHECK9-NEXT: ret void
@@ -8627,6 +8645,9 @@ int main() {
86278645
// CHECK9: omp.dispatch.inc:
86288646
// CHECK9-NEXT: br label [[OMP_DISPATCH_COND]]
86298647
// CHECK9: omp.dispatch.end:
8648+
// CHECK9-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
8649+
// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4
8650+
// CHECK9-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP31]])
86308651
// CHECK9-NEXT: br label [[OMP_PRECOND_END]]
86318652
// CHECK9: omp.precond.end:
86328653
// CHECK9-NEXT: ret void
@@ -8867,6 +8888,9 @@ int main() {
88678888
// CHECK9: omp.dispatch.inc:
88688889
// CHECK9-NEXT: br label [[OMP_DISPATCH_COND]]
88698890
// CHECK9: omp.dispatch.end:
8891+
// CHECK9-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
8892+
// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4
8893+
// CHECK9-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP32]])
88708894
// CHECK9-NEXT: br label [[OMP_PRECOND_END]]
88718895
// CHECK9: omp.precond.end:
88728896
// CHECK9-NEXT: ret void
@@ -10884,6 +10908,9 @@ int main() {
1088410908
// CHECK11: omp.dispatch.inc:
1088510909
// CHECK11-NEXT: br label [[OMP_DISPATCH_COND]]
1088610910
// CHECK11: omp.dispatch.end:
10911+
// CHECK11-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
10912+
// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4
10913+
// CHECK11-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP31]])
1088710914
// CHECK11-NEXT: br label [[OMP_PRECOND_END]]
1088810915
// CHECK11: omp.precond.end:
1088910916
// CHECK11-NEXT: ret void
@@ -11117,6 +11144,9 @@ int main() {
1111711144
// CHECK11: omp.dispatch.inc:
1111811145
// CHECK11-NEXT: br label [[OMP_DISPATCH_COND]]
1111911146
// CHECK11: omp.dispatch.end:
11147+
// CHECK11-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
11148+
// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4
11149+
// CHECK11-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP32]])
1112011150
// CHECK11-NEXT: br label [[OMP_PRECOND_END]]
1112111151
// CHECK11: omp.precond.end:
1112211152
// CHECK11-NEXT: ret void
@@ -13146,6 +13176,9 @@ int main() {
1314613176
// CHECK11: omp.dispatch.inc:
1314713177
// CHECK11-NEXT: br label [[OMP_DISPATCH_COND]]
1314813178
// CHECK11: omp.dispatch.end:
13179+
// CHECK11-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
13180+
// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4
13181+
// CHECK11-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP31]])
1314913182
// CHECK11-NEXT: br label [[OMP_PRECOND_END]]
1315013183
// CHECK11: omp.precond.end:
1315113184
// CHECK11-NEXT: ret void
@@ -13379,6 +13412,9 @@ int main() {
1337913412
// CHECK11: omp.dispatch.inc:
1338013413
// CHECK11-NEXT: br label [[OMP_DISPATCH_COND]]
1338113414
// CHECK11: omp.dispatch.end:
13415+
// CHECK11-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
13416+
// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4
13417+
// CHECK11-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB3]], i32 [[TMP32]])
1338213418
// CHECK11-NEXT: br label [[OMP_PRECOND_END]]
1338313419
// CHECK11: omp.precond.end:
1338413420
// CHECK11-NEXT: ret void

0 commit comments

Comments
 (0)