Skip to content

Commit 6bfd6d7

Browse files
jlelliIngo Molnar
authored andcommitted
sched/deadline: speed up SCHED_DEADLINE pushes with a push-heap
Data from tests confirmed that the original active load balancing logic didn't scale neither in the number of CPU nor in the number of tasks (as sched_rt does). Here we provide a global data structure to keep track of deadlines of the running tasks in the system. The structure is composed by a bitmask showing the free CPUs and a max-heap, needed when the system is heavily loaded. The implementation and concurrent access scheme are kept simple by design. However, our measurements show that we can compete with sched_rt on large multi-CPUs machines [1]. Only the push path is addressed, the extension to use this structure also for pull decisions is straightforward. However, we are currently evaluating different (in order to decrease/avoid contention) data structures to solve possibly both problems. We are also going to re-run tests considering recent changes inside cpupri [2]. [1] http://retis.sssup.it/~jlelli/papers/Ospert11Lelli.pdf [2] http://www.spinics.net/lists/linux-rt-users/msg06778.html Signed-off-by: Juri Lelli <[email protected]> Signed-off-by: Peter Zijlstra <[email protected]> Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Ingo Molnar <[email protected]>
1 parent 332ac17 commit 6bfd6d7

File tree

6 files changed

+269
-40
lines changed

6 files changed

+269
-40
lines changed

kernel/sched/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ endif
1414
obj-y += core.o proc.o clock.o cputime.o
1515
obj-y += idle_task.o fair.o rt.o deadline.o stop_task.o
1616
obj-y += wait.o completion.o
17-
obj-$(CONFIG_SMP) += cpupri.o
17+
obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o
1818
obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o
1919
obj-$(CONFIG_SCHEDSTATS) += stats.o
2020
obj-$(CONFIG_SCHED_DEBUG) += debug.o

kernel/sched/core.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5287,6 +5287,7 @@ static void free_rootdomain(struct rcu_head *rcu)
52875287
struct root_domain *rd = container_of(rcu, struct root_domain, rcu);
52885288

52895289
cpupri_cleanup(&rd->cpupri);
5290+
cpudl_cleanup(&rd->cpudl);
52905291
free_cpumask_var(rd->dlo_mask);
52915292
free_cpumask_var(rd->rto_mask);
52925293
free_cpumask_var(rd->online);
@@ -5345,6 +5346,8 @@ static int init_rootdomain(struct root_domain *rd)
53455346
goto free_dlo_mask;
53465347

53475348
init_dl_bw(&rd->dl_bw);
5349+
if (cpudl_init(&rd->cpudl) != 0)
5350+
goto free_dlo_mask;
53485351

53495352
if (cpupri_init(&rd->cpupri) != 0)
53505353
goto free_rto_mask;

kernel/sched/cpudeadline.c

Lines changed: 216 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,216 @@
1+
/*
2+
* kernel/sched/cpudl.c
3+
*
4+
* Global CPU deadline management
5+
*
6+
* Author: Juri Lelli <[email protected]>
7+
*
8+
* This program is free software; you can redistribute it and/or
9+
* modify it under the terms of the GNU General Public License
10+
* as published by the Free Software Foundation; version 2
11+
* of the License.
12+
*/
13+
14+
#include <linux/gfp.h>
15+
#include <linux/kernel.h>
16+
#include "cpudeadline.h"
17+
18+
static inline int parent(int i)
19+
{
20+
return (i - 1) >> 1;
21+
}
22+
23+
static inline int left_child(int i)
24+
{
25+
return (i << 1) + 1;
26+
}
27+
28+
static inline int right_child(int i)
29+
{
30+
return (i << 1) + 2;
31+
}
32+
33+
static inline int dl_time_before(u64 a, u64 b)
34+
{
35+
return (s64)(a - b) < 0;
36+
}
37+
38+
void cpudl_exchange(struct cpudl *cp, int a, int b)
39+
{
40+
int cpu_a = cp->elements[a].cpu, cpu_b = cp->elements[b].cpu;
41+
42+
swap(cp->elements[a], cp->elements[b]);
43+
swap(cp->cpu_to_idx[cpu_a], cp->cpu_to_idx[cpu_b]);
44+
}
45+
46+
void cpudl_heapify(struct cpudl *cp, int idx)
47+
{
48+
int l, r, largest;
49+
50+
/* adapted from lib/prio_heap.c */
51+
while(1) {
52+
l = left_child(idx);
53+
r = right_child(idx);
54+
largest = idx;
55+
56+
if ((l < cp->size) && dl_time_before(cp->elements[idx].dl,
57+
cp->elements[l].dl))
58+
largest = l;
59+
if ((r < cp->size) && dl_time_before(cp->elements[largest].dl,
60+
cp->elements[r].dl))
61+
largest = r;
62+
if (largest == idx)
63+
break;
64+
65+
/* Push idx down the heap one level and bump one up */
66+
cpudl_exchange(cp, largest, idx);
67+
idx = largest;
68+
}
69+
}
70+
71+
void cpudl_change_key(struct cpudl *cp, int idx, u64 new_dl)
72+
{
73+
WARN_ON(idx > num_present_cpus() || idx == IDX_INVALID);
74+
75+
if (dl_time_before(new_dl, cp->elements[idx].dl)) {
76+
cp->elements[idx].dl = new_dl;
77+
cpudl_heapify(cp, idx);
78+
} else {
79+
cp->elements[idx].dl = new_dl;
80+
while (idx > 0 && dl_time_before(cp->elements[parent(idx)].dl,
81+
cp->elements[idx].dl)) {
82+
cpudl_exchange(cp, idx, parent(idx));
83+
idx = parent(idx);
84+
}
85+
}
86+
}
87+
88+
static inline int cpudl_maximum(struct cpudl *cp)
89+
{
90+
return cp->elements[0].cpu;
91+
}
92+
93+
/*
94+
* cpudl_find - find the best (later-dl) CPU in the system
95+
* @cp: the cpudl max-heap context
96+
* @p: the task
97+
* @later_mask: a mask to fill in with the selected CPUs (or NULL)
98+
*
99+
* Returns: int - best CPU (heap maximum if suitable)
100+
*/
101+
int cpudl_find(struct cpudl *cp, struct task_struct *p,
102+
struct cpumask *later_mask)
103+
{
104+
int best_cpu = -1;
105+
const struct sched_dl_entity *dl_se = &p->dl;
106+
107+
if (later_mask && cpumask_and(later_mask, cp->free_cpus,
108+
&p->cpus_allowed) && cpumask_and(later_mask,
109+
later_mask, cpu_active_mask)) {
110+
best_cpu = cpumask_any(later_mask);
111+
goto out;
112+
} else if (cpumask_test_cpu(cpudl_maximum(cp), &p->cpus_allowed) &&
113+
dl_time_before(dl_se->deadline, cp->elements[0].dl)) {
114+
best_cpu = cpudl_maximum(cp);
115+
if (later_mask)
116+
cpumask_set_cpu(best_cpu, later_mask);
117+
}
118+
119+
out:
120+
WARN_ON(best_cpu > num_present_cpus() && best_cpu != -1);
121+
122+
return best_cpu;
123+
}
124+
125+
/*
126+
* cpudl_set - update the cpudl max-heap
127+
* @cp: the cpudl max-heap context
128+
* @cpu: the target cpu
129+
* @dl: the new earliest deadline for this cpu
130+
*
131+
* Notes: assumes cpu_rq(cpu)->lock is locked
132+
*
133+
* Returns: (void)
134+
*/
135+
void cpudl_set(struct cpudl *cp, int cpu, u64 dl, int is_valid)
136+
{
137+
int old_idx, new_cpu;
138+
unsigned long flags;
139+
140+
WARN_ON(cpu > num_present_cpus());
141+
142+
raw_spin_lock_irqsave(&cp->lock, flags);
143+
old_idx = cp->cpu_to_idx[cpu];
144+
if (!is_valid) {
145+
/* remove item */
146+
if (old_idx == IDX_INVALID) {
147+
/*
148+
* Nothing to remove if old_idx was invalid.
149+
* This could happen if a rq_offline_dl is
150+
* called for a CPU without -dl tasks running.
151+
*/
152+
goto out;
153+
}
154+
new_cpu = cp->elements[cp->size - 1].cpu;
155+
cp->elements[old_idx].dl = cp->elements[cp->size - 1].dl;
156+
cp->elements[old_idx].cpu = new_cpu;
157+
cp->size--;
158+
cp->cpu_to_idx[new_cpu] = old_idx;
159+
cp->cpu_to_idx[cpu] = IDX_INVALID;
160+
while (old_idx > 0 && dl_time_before(
161+
cp->elements[parent(old_idx)].dl,
162+
cp->elements[old_idx].dl)) {
163+
cpudl_exchange(cp, old_idx, parent(old_idx));
164+
old_idx = parent(old_idx);
165+
}
166+
cpumask_set_cpu(cpu, cp->free_cpus);
167+
cpudl_heapify(cp, old_idx);
168+
169+
goto out;
170+
}
171+
172+
if (old_idx == IDX_INVALID) {
173+
cp->size++;
174+
cp->elements[cp->size - 1].dl = 0;
175+
cp->elements[cp->size - 1].cpu = cpu;
176+
cp->cpu_to_idx[cpu] = cp->size - 1;
177+
cpudl_change_key(cp, cp->size - 1, dl);
178+
cpumask_clear_cpu(cpu, cp->free_cpus);
179+
} else {
180+
cpudl_change_key(cp, old_idx, dl);
181+
}
182+
183+
out:
184+
raw_spin_unlock_irqrestore(&cp->lock, flags);
185+
}
186+
187+
/*
188+
* cpudl_init - initialize the cpudl structure
189+
* @cp: the cpudl max-heap context
190+
*/
191+
int cpudl_init(struct cpudl *cp)
192+
{
193+
int i;
194+
195+
memset(cp, 0, sizeof(*cp));
196+
raw_spin_lock_init(&cp->lock);
197+
cp->size = 0;
198+
for (i = 0; i < NR_CPUS; i++)
199+
cp->cpu_to_idx[i] = IDX_INVALID;
200+
if (!alloc_cpumask_var(&cp->free_cpus, GFP_KERNEL))
201+
return -ENOMEM;
202+
cpumask_setall(cp->free_cpus);
203+
204+
return 0;
205+
}
206+
207+
/*
208+
* cpudl_cleanup - clean up the cpudl structure
209+
* @cp: the cpudl max-heap context
210+
*/
211+
void cpudl_cleanup(struct cpudl *cp)
212+
{
213+
/*
214+
* nothing to do for the moment
215+
*/
216+
}

kernel/sched/cpudeadline.h

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
#ifndef _LINUX_CPUDL_H
2+
#define _LINUX_CPUDL_H
3+
4+
#include <linux/sched.h>
5+
6+
#define IDX_INVALID -1
7+
8+
struct array_item {
9+
u64 dl;
10+
int cpu;
11+
};
12+
13+
struct cpudl {
14+
raw_spinlock_t lock;
15+
int size;
16+
int cpu_to_idx[NR_CPUS];
17+
struct array_item elements[NR_CPUS];
18+
cpumask_var_t free_cpus;
19+
};
20+
21+
22+
#ifdef CONFIG_SMP
23+
int cpudl_find(struct cpudl *cp, struct task_struct *p,
24+
struct cpumask *later_mask);
25+
void cpudl_set(struct cpudl *cp, int cpu, u64 dl, int is_valid);
26+
int cpudl_init(struct cpudl *cp);
27+
void cpudl_cleanup(struct cpudl *cp);
28+
#else
29+
#define cpudl_set(cp, cpu, dl) do { } while (0)
30+
#define cpudl_init() do { } while (0)
31+
#endif /* CONFIG_SMP */
32+
33+
#endif /* _LINUX_CPUDL_H */

0 commit comments

Comments
 (0)