Skip to content

Commit d454c73

Browse files
OpenMP 4.5: implemented support of schedule(simd:guided) and
schedule(simd:runtime) - library part. Compiler generation should use newly introduced scheduling kinds kmp_sch_guided_simd = 46, kmp_sch_runtime_simd = 47, as parameters to __kmpc_dispatch_init_* entries. Differential Revision: https://reviews.llvm.org/D31602 llvm-svn: 304724
1 parent 6350de7 commit d454c73

File tree

7 files changed

+1160
-3
lines changed

7 files changed

+1160
-3
lines changed

openmp/runtime/src/kmp.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -334,10 +334,12 @@ enum sched_type {
334334
#if OMP_45_ENABLED
335335
/* static with chunk adjustment (e.g., simd) */
336336
kmp_sch_static_balanced_chunked = 45,
337+
kmp_sch_guided_simd = 46, /**< guided with chunk adjustment */
338+
kmp_sch_runtime_simd = 47, /**< runtime with chunk adjustment */
337339
#endif
338340

339341
/* accessible only through KMP_SCHEDULE environment variable */
340-
kmp_sch_upper = 46, /**< upper bound for unordered values */
342+
kmp_sch_upper = 48, /**< upper bound for unordered values */
341343

342344
kmp_ord_lower = 64, /**< lower bound for ordered values, must be power of 2 */
343345
kmp_ord_static_chunked = 65,

openmp/runtime/src/kmp_dispatch.cpp

Lines changed: 128 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -681,6 +681,35 @@ __kmp_dispatch_init(ident_t *loc, int gtid, enum sched_type schedule, T lb,
681681
schedule = kmp_sch_guided_iterative_chunked;
682682
KMP_WARNING(DispatchManyThreads);
683683
}
684+
if (schedule == kmp_sch_runtime_simd) {
685+
// compiler provides simd_width in the chunk parameter
686+
schedule = team->t.t_sched.r_sched_type;
687+
// Detail the schedule if needed (global controls are differentiated
688+
// appropriately)
689+
if (schedule == kmp_sch_static || schedule == kmp_sch_auto ||
690+
schedule == __kmp_static) {
691+
schedule = kmp_sch_static_balanced_chunked;
692+
} else {
693+
if (schedule == kmp_sch_guided_chunked || schedule == __kmp_guided) {
694+
schedule = kmp_sch_guided_simd;
695+
}
696+
chunk = team->t.t_sched.chunk * chunk;
697+
}
698+
#if USE_ITT_BUILD
699+
cur_chunk = chunk;
700+
#endif
701+
#ifdef KMP_DEBUG
702+
{
703+
const char *buff;
704+
// create format specifiers before the debug output
705+
buff = __kmp_str_format("__kmp_dispatch_init: T#%%d new: schedule:%%d"
706+
" chunk:%%%s\n",
707+
traits_t<ST>::spec);
708+
KD_TRACE(10, (buff, gtid, schedule, chunk));
709+
__kmp_str_free(&buff);
710+
}
711+
#endif
712+
}
684713
pr->u.p.parm1 = chunk;
685714
}
686715
KMP_ASSERT2((kmp_sch_lower < schedule && schedule < kmp_sch_upper),
@@ -878,7 +907,21 @@ __kmp_dispatch_init(ident_t *loc, int gtid, enum sched_type schedule, T lb,
878907
}
879908
break;
880909
} // case
881-
case kmp_sch_guided_iterative_chunked: {
910+
case kmp_sch_static_balanced_chunked: {
911+
// similar to balanced, but chunk adjusted to multiple of simd width
912+
T nth = th->th.th_team_nproc;
913+
KD_TRACE(100, ("__kmp_dispatch_init: T#%d runtime(simd:static)"
914+
" -> falling-through to static_greedy\n",
915+
gtid));
916+
schedule = kmp_sch_static_greedy;
917+
if (nth > 1)
918+
pr->u.p.parm1 = ((tc + nth - 1) / nth + chunk - 1) & ~(chunk - 1);
919+
else
920+
pr->u.p.parm1 = tc;
921+
break;
922+
} // case
923+
case kmp_sch_guided_iterative_chunked:
924+
case kmp_sch_guided_simd: {
882925
T nproc = th->th.th_team_nproc;
883926
KD_TRACE(100, ("__kmp_dispatch_init: T#%d kmp_sch_guided_iterative_chunked"
884927
" case\n",
@@ -1140,6 +1183,7 @@ __kmp_dispatch_init(ident_t *loc, int gtid, enum sched_type schedule, T lb,
11401183
break;
11411184
case kmp_sch_guided_iterative_chunked:
11421185
case kmp_sch_guided_analytical_chunked:
1186+
case kmp_sch_guided_simd:
11431187
schedtype = 2;
11441188
break;
11451189
default:
@@ -1991,6 +2035,89 @@ static int __kmp_dispatch_next(ident_t *loc, int gtid, kmp_int32 *p_last,
19912035
} // case
19922036
break;
19932037

2038+
case kmp_sch_guided_simd: {
2039+
// same as iterative but curr-chunk adjusted to be multiple of given
2040+
// chunk
2041+
T chunk = pr->u.p.parm1;
2042+
KD_TRACE(100, ("__kmp_dispatch_next: T#%d kmp_sch_guided_simd case\n",
2043+
gtid));
2044+
trip = pr->u.p.tc;
2045+
// Start atomic part of calculations
2046+
while (1) {
2047+
ST remaining; // signed, because can be < 0
2048+
init = sh->u.s.iteration; // shared value
2049+
remaining = trip - init;
2050+
if (remaining <= 0) { // AC: need to compare with 0 first
2051+
status = 0; // nothing to do, don't try atomic op
2052+
break;
2053+
}
2054+
KMP_DEBUG_ASSERT(init % chunk == 0);
2055+
// compare with K*nproc*(chunk+1), K=2 by default
2056+
if ((T)remaining < pr->u.p.parm2) {
2057+
// use dynamic-style shcedule
2058+
// atomically inrement iterations, get old value
2059+
init = test_then_add<ST>((ST *)&sh->u.s.iteration, (ST)chunk);
2060+
remaining = trip - init;
2061+
if (remaining <= 0) {
2062+
status = 0; // all iterations got by other threads
2063+
} else {
2064+
// got some iterations to work on
2065+
status = 1;
2066+
if ((T)remaining > chunk) {
2067+
limit = init + chunk - 1;
2068+
} else {
2069+
last = 1; // the last chunk
2070+
limit = init + remaining - 1;
2071+
} // if
2072+
} // if
2073+
break;
2074+
} // if
2075+
// divide by K*nproc
2076+
UT span = remaining * (*(double *)&pr->u.p.parm3);
2077+
UT rem = span % chunk;
2078+
if (rem) // adjust so that span%chunk == 0
2079+
span += chunk - rem;
2080+
limit = init + span;
2081+
if (compare_and_swap<ST>((ST *)&sh->u.s.iteration, (ST)init,
2082+
(ST)limit)) {
2083+
// CAS was successful, chunk obtained
2084+
status = 1;
2085+
--limit;
2086+
break;
2087+
} // if
2088+
} // while
2089+
if (status != 0) {
2090+
start = pr->u.p.lb;
2091+
incr = pr->u.p.st;
2092+
if (p_st != NULL)
2093+
*p_st = incr;
2094+
*p_lb = start + init * incr;
2095+
*p_ub = start + limit * incr;
2096+
if (pr->ordered) {
2097+
pr->u.p.ordered_lower = init;
2098+
pr->u.p.ordered_upper = limit;
2099+
#ifdef KMP_DEBUG
2100+
{
2101+
const char *buff;
2102+
// create format specifiers before the debug output
2103+
buff = __kmp_str_format("__kmp_dispatch_next: T#%%d "
2104+
"ordered_lower:%%%s ordered_upper:%%%s\n",
2105+
traits_t<UT>::spec, traits_t<UT>::spec);
2106+
KD_TRACE(1000, (buff, gtid, pr->u.p.ordered_lower,
2107+
pr->u.p.ordered_upper));
2108+
__kmp_str_free(&buff);
2109+
}
2110+
#endif
2111+
} // if
2112+
} else {
2113+
*p_lb = 0;
2114+
*p_ub = 0;
2115+
if (p_st != NULL)
2116+
*p_st = 0;
2117+
} // if
2118+
} // case
2119+
break;
2120+
19942121
case kmp_sch_guided_analytical_chunked: {
19952122
T chunkspec = pr->u.p.parm1;
19962123
UT chunkIdx;

openmp/runtime/src/kmp_runtime.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2744,7 +2744,7 @@ void __kmp_set_schedule(int gtid, kmp_sched_t kind, int chunk) {
27442744
__kmp_sch_map[kind - kmp_sched_lower_ext + kmp_sched_upper_std -
27452745
kmp_sched_lower - 2];
27462746
}
2747-
if (kind == kmp_sched_auto) {
2747+
if (kind == kmp_sched_auto || chunk < 1) {
27482748
// ignore parameter chunk for schedule auto
27492749
thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
27502750
} else {

0 commit comments

Comments
 (0)