Skip to content

Commit 1724813

Browse files
Peter ZijlstraIngo Molnar
authored andcommitted
sched/deadline: Remove the sysctl_sched_dl knobs
Remove the deadline specific sysctls for now. The problem with them is that the interaction with the exisiting rt knobs is nearly impossible to get right. The current (as per before this patch) situation is that the rt and dl bandwidth is completely separate and we enforce rt+dl < 100%. This is undesirable because this means that the rt default of 95% leaves us hardly any room, even though dl tasks are saver than rt tasks. Another proposed solution was (a discarted patch) to have the dl bandwidth be a fraction of the rt bandwidth. This is highly confusing imo. Furthermore neither proposal is consistent with the situation we actually want; which is rt tasks ran from a dl server. In which case the rt bandwidth is a direct subset of dl. So whichever way we go, the introduction of dl controls at this point is painful. Therefore remove them and instead share the rt budget. This means that for now the rt knobs are used for dl admission control and the dl runtime is accounted against the rt runtime. I realise that this isn't entirely desirable either; but whatever we do we appear to need to change the interface later, so better have a small interface for now. Signed-off-by: Peter Zijlstra <[email protected]> Link: http://lkml.kernel.org/n/[email protected] Signed-off-by: Ingo Molnar <[email protected]>
1 parent e4099a5 commit 1724813

File tree

5 files changed

+97
-234
lines changed

5 files changed

+97
-234
lines changed

include/linux/sched/sysctl.h

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -81,15 +81,6 @@ static inline unsigned int get_sysctl_timer_migration(void)
8181
extern unsigned int sysctl_sched_rt_period;
8282
extern int sysctl_sched_rt_runtime;
8383

84-
/*
85-
* control SCHED_DEADLINE reservations:
86-
*
87-
* /proc/sys/kernel/sched_dl_period_us
88-
* /proc/sys/kernel/sched_dl_runtime_us
89-
*/
90-
extern unsigned int sysctl_sched_dl_period;
91-
extern int sysctl_sched_dl_runtime;
92-
9384
#ifdef CONFIG_CFS_BANDWIDTH
9485
extern unsigned int sysctl_sched_cfs_bandwidth_slice;
9586
#endif
@@ -108,8 +99,4 @@ extern int sched_rt_handler(struct ctl_table *table, int write,
10899
void __user *buffer, size_t *lenp,
109100
loff_t *ppos);
110101

111-
int sched_dl_handler(struct ctl_table *table, int write,
112-
void __user *buffer, size_t *lenp,
113-
loff_t *ppos);
114-
115102
#endif /* _SCHED_SYSCTL_H */

kernel/sched/core.c

Lines changed: 71 additions & 188 deletions
Original file line numberDiff line numberDiff line change
@@ -6771,7 +6771,7 @@ void __init sched_init(void)
67716771
init_rt_bandwidth(&def_rt_bandwidth,
67726772
global_rt_period(), global_rt_runtime());
67736773
init_dl_bandwidth(&def_dl_bandwidth,
6774-
global_dl_period(), global_dl_runtime());
6774+
global_rt_period(), global_rt_runtime());
67756775

67766776
#ifdef CONFIG_SMP
67776777
init_defrootdomain();
@@ -7354,64 +7354,11 @@ static long sched_group_rt_period(struct task_group *tg)
73547354
}
73557355
#endif /* CONFIG_RT_GROUP_SCHED */
73567356

7357-
/*
7358-
* Coupling of -rt and -deadline bandwidth.
7359-
*
7360-
* Here we check if the new -rt bandwidth value is consistent
7361-
* with the system settings for the bandwidth available
7362-
* to -deadline tasks.
7363-
*
7364-
* IOW, we want to enforce that
7365-
*
7366-
* rt_bandwidth + dl_bandwidth <= 100%
7367-
*
7368-
* is always true.
7369-
*/
7370-
static bool __sched_rt_dl_global_constraints(u64 rt_bw)
7371-
{
7372-
unsigned long flags;
7373-
u64 dl_bw;
7374-
bool ret;
7375-
7376-
raw_spin_lock_irqsave(&def_dl_bandwidth.dl_runtime_lock, flags);
7377-
if (global_rt_runtime() == RUNTIME_INF ||
7378-
global_dl_runtime() == RUNTIME_INF) {
7379-
ret = true;
7380-
goto unlock;
7381-
}
7382-
7383-
dl_bw = to_ratio(def_dl_bandwidth.dl_period,
7384-
def_dl_bandwidth.dl_runtime);
7385-
7386-
ret = rt_bw + dl_bw <= to_ratio(RUNTIME_INF, RUNTIME_INF);
7387-
unlock:
7388-
raw_spin_unlock_irqrestore(&def_dl_bandwidth.dl_runtime_lock, flags);
7389-
7390-
return ret;
7391-
}
7392-
73937357
#ifdef CONFIG_RT_GROUP_SCHED
73947358
static int sched_rt_global_constraints(void)
73957359
{
7396-
u64 runtime, period, bw;
73977360
int ret = 0;
73987361

7399-
if (sysctl_sched_rt_period <= 0)
7400-
return -EINVAL;
7401-
7402-
runtime = global_rt_runtime();
7403-
period = global_rt_period();
7404-
7405-
/*
7406-
* Sanity check on the sysctl variables.
7407-
*/
7408-
if (runtime > period && runtime != RUNTIME_INF)
7409-
return -EINVAL;
7410-
7411-
bw = to_ratio(period, runtime);
7412-
if (!__sched_rt_dl_global_constraints(bw))
7413-
return -EINVAL;
7414-
74157362
mutex_lock(&rt_constraints_mutex);
74167363
read_lock(&tasklist_lock);
74177364
ret = __rt_schedulable(NULL, 0, 0);
@@ -7435,88 +7382,27 @@ static int sched_rt_global_constraints(void)
74357382
{
74367383
unsigned long flags;
74377384
int i, ret = 0;
7438-
u64 bw;
7439-
7440-
if (sysctl_sched_rt_period <= 0)
7441-
return -EINVAL;
74427385

74437386
raw_spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags);
7444-
bw = to_ratio(global_rt_period(), global_rt_runtime());
7445-
if (!__sched_rt_dl_global_constraints(bw)) {
7446-
ret = -EINVAL;
7447-
goto unlock;
7448-
}
7449-
74507387
for_each_possible_cpu(i) {
74517388
struct rt_rq *rt_rq = &cpu_rq(i)->rt;
74527389

74537390
raw_spin_lock(&rt_rq->rt_runtime_lock);
74547391
rt_rq->rt_runtime = global_rt_runtime();
74557392
raw_spin_unlock(&rt_rq->rt_runtime_lock);
74567393
}
7457-
unlock:
74587394
raw_spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags);
74597395

74607396
return ret;
74617397
}
74627398
#endif /* CONFIG_RT_GROUP_SCHED */
74637399

7464-
/*
7465-
* Coupling of -dl and -rt bandwidth.
7466-
*
7467-
* Here we check, while setting the system wide bandwidth available
7468-
* for -dl tasks and groups, if the new values are consistent with
7469-
* the system settings for the bandwidth available to -rt entities.
7470-
*
7471-
* IOW, we want to enforce that
7472-
*
7473-
* rt_bandwidth + dl_bandwidth <= 100%
7474-
*
7475-
* is always true.
7476-
*/
7477-
static bool __sched_dl_rt_global_constraints(u64 dl_bw)
7478-
{
7479-
u64 rt_bw;
7480-
bool ret;
7481-
7482-
raw_spin_lock(&def_rt_bandwidth.rt_runtime_lock);
7483-
if (global_dl_runtime() == RUNTIME_INF ||
7484-
global_rt_runtime() == RUNTIME_INF) {
7485-
ret = true;
7486-
goto unlock;
7487-
}
7488-
7489-
rt_bw = to_ratio(ktime_to_ns(def_rt_bandwidth.rt_period),
7490-
def_rt_bandwidth.rt_runtime);
7491-
7492-
ret = rt_bw + dl_bw <= to_ratio(RUNTIME_INF, RUNTIME_INF);
7493-
unlock:
7494-
raw_spin_unlock(&def_rt_bandwidth.rt_runtime_lock);
7495-
7496-
return ret;
7497-
}
7498-
7499-
static bool __sched_dl_global_constraints(u64 runtime, u64 period)
7500-
{
7501-
if (!period || (runtime != RUNTIME_INF && runtime > period))
7502-
return -EINVAL;
7503-
7504-
return 0;
7505-
}
7506-
75077400
static int sched_dl_global_constraints(void)
75087401
{
7509-
u64 runtime = global_dl_runtime();
7510-
u64 period = global_dl_period();
7402+
u64 runtime = global_rt_runtime();
7403+
u64 period = global_rt_period();
75117404
u64 new_bw = to_ratio(period, runtime);
7512-
int ret, i;
7513-
7514-
ret = __sched_dl_global_constraints(runtime, period);
7515-
if (ret)
7516-
return ret;
7517-
7518-
if (!__sched_dl_rt_global_constraints(new_bw))
7519-
return -EINVAL;
7405+
int cpu, ret = 0;
75207406

75217407
/*
75227408
* Here we want to check the bandwidth not being set to some
@@ -7527,46 +7413,68 @@ static int sched_dl_global_constraints(void)
75277413
* cycling on root_domains... Discussion on different/better
75287414
* solutions is welcome!
75297415
*/
7530-
for_each_possible_cpu(i) {
7531-
struct dl_bw *dl_b = dl_bw_of(i);
7416+
for_each_possible_cpu(cpu) {
7417+
struct dl_bw *dl_b = dl_bw_of(cpu);
75327418

75337419
raw_spin_lock(&dl_b->lock);
7534-
if (new_bw < dl_b->total_bw) {
7535-
raw_spin_unlock(&dl_b->lock);
7536-
return -EBUSY;
7537-
}
7420+
if (new_bw < dl_b->total_bw)
7421+
ret = -EBUSY;
75387422
raw_spin_unlock(&dl_b->lock);
7423+
7424+
if (ret)
7425+
break;
75397426
}
75407427

7541-
return 0;
7428+
return ret;
75427429
}
75437430

7544-
int sched_rr_handler(struct ctl_table *table, int write,
7545-
void __user *buffer, size_t *lenp,
7546-
loff_t *ppos)
7431+
static void sched_dl_do_global(void)
75477432
{
7548-
int ret;
7549-
static DEFINE_MUTEX(mutex);
7433+
u64 new_bw = -1;
7434+
int cpu;
75507435

7551-
mutex_lock(&mutex);
7552-
ret = proc_dointvec(table, write, buffer, lenp, ppos);
7553-
/* make sure that internally we keep jiffies */
7554-
/* also, writing zero resets timeslice to default */
7555-
if (!ret && write) {
7556-
sched_rr_timeslice = sched_rr_timeslice <= 0 ?
7557-
RR_TIMESLICE : msecs_to_jiffies(sched_rr_timeslice);
7436+
def_dl_bandwidth.dl_period = global_rt_period();
7437+
def_dl_bandwidth.dl_runtime = global_rt_runtime();
7438+
7439+
if (global_rt_runtime() != RUNTIME_INF)
7440+
new_bw = to_ratio(global_rt_period(), global_rt_runtime());
7441+
7442+
/*
7443+
* FIXME: As above...
7444+
*/
7445+
for_each_possible_cpu(cpu) {
7446+
struct dl_bw *dl_b = dl_bw_of(cpu);
7447+
7448+
raw_spin_lock(&dl_b->lock);
7449+
dl_b->bw = new_bw;
7450+
raw_spin_unlock(&dl_b->lock);
75587451
}
7559-
mutex_unlock(&mutex);
7560-
return ret;
7452+
}
7453+
7454+
static int sched_rt_global_validate(void)
7455+
{
7456+
if (sysctl_sched_rt_period <= 0)
7457+
return -EINVAL;
7458+
7459+
if (sysctl_sched_rt_runtime > sysctl_sched_rt_period)
7460+
return -EINVAL;
7461+
7462+
return 0;
7463+
}
7464+
7465+
static void sched_rt_do_global(void)
7466+
{
7467+
def_rt_bandwidth.rt_runtime = global_rt_runtime();
7468+
def_rt_bandwidth.rt_period = ns_to_ktime(global_rt_period());
75617469
}
75627470

75637471
int sched_rt_handler(struct ctl_table *table, int write,
75647472
void __user *buffer, size_t *lenp,
75657473
loff_t *ppos)
75667474
{
7567-
int ret;
75687475
int old_period, old_runtime;
75697476
static DEFINE_MUTEX(mutex);
7477+
int ret;
75707478

75717479
mutex_lock(&mutex);
75727480
old_period = sysctl_sched_rt_period;
@@ -7575,72 +7483,47 @@ int sched_rt_handler(struct ctl_table *table, int write,
75757483
ret = proc_dointvec(table, write, buffer, lenp, ppos);
75767484

75777485
if (!ret && write) {
7486+
ret = sched_rt_global_validate();
7487+
if (ret)
7488+
goto undo;
7489+
75787490
ret = sched_rt_global_constraints();
7579-
if (ret) {
7580-
sysctl_sched_rt_period = old_period;
7581-
sysctl_sched_rt_runtime = old_runtime;
7582-
} else {
7583-
def_rt_bandwidth.rt_runtime = global_rt_runtime();
7584-
def_rt_bandwidth.rt_period =
7585-
ns_to_ktime(global_rt_period());
7586-
}
7491+
if (ret)
7492+
goto undo;
7493+
7494+
ret = sched_dl_global_constraints();
7495+
if (ret)
7496+
goto undo;
7497+
7498+
sched_rt_do_global();
7499+
sched_dl_do_global();
7500+
}
7501+
if (0) {
7502+
undo:
7503+
sysctl_sched_rt_period = old_period;
7504+
sysctl_sched_rt_runtime = old_runtime;
75877505
}
75887506
mutex_unlock(&mutex);
75897507

75907508
return ret;
75917509
}
75927510

7593-
int sched_dl_handler(struct ctl_table *table, int write,
7511+
int sched_rr_handler(struct ctl_table *table, int write,
75947512
void __user *buffer, size_t *lenp,
75957513
loff_t *ppos)
75967514
{
75977515
int ret;
7598-
int old_period, old_runtime;
75997516
static DEFINE_MUTEX(mutex);
7600-
unsigned long flags;
76017517

76027518
mutex_lock(&mutex);
7603-
old_period = sysctl_sched_dl_period;
7604-
old_runtime = sysctl_sched_dl_runtime;
7605-
76067519
ret = proc_dointvec(table, write, buffer, lenp, ppos);
7607-
7520+
/* make sure that internally we keep jiffies */
7521+
/* also, writing zero resets timeslice to default */
76087522
if (!ret && write) {
7609-
raw_spin_lock_irqsave(&def_dl_bandwidth.dl_runtime_lock,
7610-
flags);
7611-
7612-
ret = sched_dl_global_constraints();
7613-
if (ret) {
7614-
sysctl_sched_dl_period = old_period;
7615-
sysctl_sched_dl_runtime = old_runtime;
7616-
} else {
7617-
u64 new_bw;
7618-
int i;
7619-
7620-
def_dl_bandwidth.dl_period = global_dl_period();
7621-
def_dl_bandwidth.dl_runtime = global_dl_runtime();
7622-
if (global_dl_runtime() == RUNTIME_INF)
7623-
new_bw = -1;
7624-
else
7625-
new_bw = to_ratio(global_dl_period(),
7626-
global_dl_runtime());
7627-
/*
7628-
* FIXME: As above...
7629-
*/
7630-
for_each_possible_cpu(i) {
7631-
struct dl_bw *dl_b = dl_bw_of(i);
7632-
7633-
raw_spin_lock(&dl_b->lock);
7634-
dl_b->bw = new_bw;
7635-
raw_spin_unlock(&dl_b->lock);
7636-
}
7637-
}
7638-
7639-
raw_spin_unlock_irqrestore(&def_dl_bandwidth.dl_runtime_lock,
7640-
flags);
7523+
sched_rr_timeslice = sched_rr_timeslice <= 0 ?
7524+
RR_TIMESLICE : msecs_to_jiffies(sched_rr_timeslice);
76417525
}
76427526
mutex_unlock(&mutex);
7643-
76447527
return ret;
76457528
}
76467529

0 commit comments

Comments
 (0)