Skip to content

Commit 4203ff4

Browse files
Prakash Sangappajfvogel
authored andcommitted
Sched: Scheduler time slice extension
Add support for a thread to request extending its execution time slice on the cpu. The extra cpu time granted would help in allowing the thread to complete executing the critical section and drop any locks without getting preempted. The thread would request this cpu time extension, by setting a bit in the restartable sequences(rseq) structure registered with the kernel. Kernel will grant a 50us extension on the cpu, when it sees the bit set. With the help of a timer, kernel force preempts the thread if it is still running on the cpu when the 50us timer expires. The thread should yield the cpu after completing the critical section. Orabug: 37583007 Suggested-by: Peter Ziljstra <[email protected]> Signed-off-by: Prakash Sangappa <[email protected]> Reviewed-by: Chris Hyser <[email protected]>
1 parent d10b7b0 commit 4203ff4

File tree

7 files changed

+121
-8
lines changed

7 files changed

+121
-8
lines changed

include/linux/entry-common.h

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -302,7 +302,8 @@ void arch_do_signal_or_restart(struct pt_regs *regs);
302302
* exit_to_user_mode_loop - do any pending work before leaving to user space
303303
*/
304304
unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
305-
unsigned long ti_work);
305+
unsigned long ti_work,
306+
bool irq);
306307

307308
/**
308309
* exit_to_user_mode_prepare - call exit_to_user_mode_loop() if required
@@ -314,7 +315,8 @@ unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
314315
* EXIT_TO_USER_MODE_WORK are set
315316
* 4) check that interrupts are still disabled
316317
*/
317-
static __always_inline void exit_to_user_mode_prepare(struct pt_regs *regs)
318+
static __always_inline void exit_to_user_mode_prepare(struct pt_regs *regs,
319+
bool irq)
318320
{
319321
unsigned long ti_work;
320322

@@ -325,7 +327,10 @@ static __always_inline void exit_to_user_mode_prepare(struct pt_regs *regs)
325327

326328
ti_work = read_thread_flags();
327329
if (unlikely(ti_work & EXIT_TO_USER_MODE_WORK))
328-
ti_work = exit_to_user_mode_loop(regs, ti_work);
330+
ti_work = exit_to_user_mode_loop(regs, ti_work, irq);
331+
332+
if (irq)
333+
rseq_delay_resched_fini();
329334

330335
arch_exit_to_user_mode_prepare(regs, ti_work);
331336

include/linux/sched.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -327,6 +327,7 @@ extern int __must_check io_schedule_prepare(void);
327327
extern void io_schedule_finish(int token);
328328
extern long io_schedule_timeout(long timeout);
329329
extern void io_schedule(void);
330+
extern void hrtick_local_start(u64 delay);
330331

331332
/**
332333
* struct prev_cputime - snapshot of system and user cputime
@@ -933,6 +934,9 @@ struct task_struct {
933934
struct plist_node pushable_tasks;
934935
struct rb_node pushable_dl_tasks;
935936
#endif
937+
#ifdef CONFIG_RSEQ
938+
unsigned rseq_sched_delay:1;
939+
#endif
936940

937941
struct mm_struct *mm;
938942
struct mm_struct *active_mm;
@@ -2208,6 +2212,20 @@ static inline bool owner_on_cpu(struct task_struct *owner)
22082212
unsigned long sched_cpu_util(int cpu);
22092213
#endif /* CONFIG_SMP */
22102214

2215+
#ifdef CONFIG_RSEQ
2216+
2217+
extern bool rseq_delay_resched(void);
2218+
extern void rseq_delay_resched_fini(void);
2219+
extern void rseq_delay_resched_tick(void);
2220+
2221+
#else
2222+
2223+
static inline bool rseq_delay_resched(void) { return false; }
2224+
static inline void rseq_delay_resched_fini(void) { }
2225+
static inline void rseq_delay_resched_tick(void) { }
2226+
2227+
#endif
2228+
22112229
#ifdef CONFIG_SCHED_CORE
22122230
extern void sched_core_free(struct task_struct *tsk);
22132231
extern void sched_core_fork(struct task_struct *p);

include/uapi/linux/rseq.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ enum rseq_cs_flags_bit {
2626
RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT = 0,
2727
RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT = 1,
2828
RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT = 2,
29+
RSEQ_CS_FLAG_DELAY_RESCHED_BIT = 3,
2930
};
3031

3132
enum rseq_cs_flags {
@@ -35,6 +36,8 @@ enum rseq_cs_flags {
3536
(1U << RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT),
3637
RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE =
3738
(1U << RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT),
39+
RSEQ_CS_FLAG_DELAY_RESCHED =
40+
(1U << RSEQ_CS_FLAG_DELAY_RESCHED_BIT),
3841
};
3942

4043
/*
@@ -128,6 +131,8 @@ struct rseq {
128131
* - RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE
129132
* Inhibit instruction sequence block restart on migration for
130133
* this thread.
134+
* - RSEQ_CS_DELAY_RESCHED
135+
* Try delay resched...
131136
*/
132137
__u32 flags;
133138

kernel/entry/common.c

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,8 @@ void __weak arch_do_signal_or_restart(struct pt_regs *regs) { }
8989
* @ti_work: TIF work flags as read by the caller
9090
*/
9191
__always_inline unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
92-
unsigned long ti_work)
92+
unsigned long ti_work,
93+
bool irq)
9394
{
9495
/*
9596
* Before returning to user space ensure that all pending work
@@ -99,8 +100,12 @@ __always_inline unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
99100

100101
local_irq_enable_exit_to_user(ti_work);
101102

102-
if (ti_work & _TIF_NEED_RESCHED)
103-
schedule();
103+
if (ti_work & _TIF_NEED_RESCHED) {
104+
if (irq && rseq_delay_resched())
105+
clear_tsk_need_resched(current);
106+
else
107+
schedule();
108+
}
104109

105110
if (ti_work & _TIF_UPROBE)
106111
uprobe_notify_resume(regs);
@@ -208,7 +213,7 @@ static __always_inline void __syscall_exit_to_user_mode_work(struct pt_regs *reg
208213
{
209214
syscall_exit_to_user_mode_prepare(regs);
210215
local_irq_disable_exit_to_user();
211-
exit_to_user_mode_prepare(regs);
216+
exit_to_user_mode_prepare(regs, false);
212217
}
213218

214219
void syscall_exit_to_user_mode_work(struct pt_regs *regs)
@@ -232,7 +237,7 @@ noinstr void irqentry_enter_from_user_mode(struct pt_regs *regs)
232237
noinstr void irqentry_exit_to_user_mode(struct pt_regs *regs)
233238
{
234239
instrumentation_begin();
235-
exit_to_user_mode_prepare(regs);
240+
exit_to_user_mode_prepare(regs, true);
236241
instrumentation_end();
237242
exit_to_user_mode();
238243
}

kernel/rseq.c

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -339,6 +339,62 @@ void __rseq_handle_notify_resume(struct ksignal *ksig, struct pt_regs *regs)
339339
force_sigsegv(sig);
340340
}
341341

342+
bool rseq_delay_resched(void)
343+
{
344+
struct task_struct *t = current;
345+
u32 flags;
346+
347+
if (!IS_ENABLED(CONFIG_SCHED_HRTICK))
348+
return false;
349+
350+
if (!t->rseq)
351+
return false;
352+
353+
if (t->rseq_sched_delay)
354+
return false;
355+
356+
if (copy_from_user_nofault(&flags, &t->rseq->flags, sizeof(flags)))
357+
return false;
358+
359+
if (!(flags & RSEQ_CS_FLAG_DELAY_RESCHED))
360+
return false;
361+
362+
flags &= ~RSEQ_CS_FLAG_DELAY_RESCHED;
363+
if (copy_to_user_nofault(&t->rseq->flags, &flags, sizeof(flags)))
364+
return false;
365+
366+
t->rseq_sched_delay = 1;
367+
368+
return true;
369+
}
370+
371+
void rseq_delay_resched_fini(void)
372+
{
373+
#ifdef CONFIG_SCHED_HRTICK
374+
extern void hrtick_local_start(u64 delay);
375+
struct task_struct *t = current;
376+
/*
377+
* IRQs off, guaranteed to return to userspace, start timer on this CPU
378+
* to limit the resched-overdraft.
379+
*
380+
* If your critical section is longer than 50 us you get to keep the
381+
* pieces.
382+
*/
383+
if (t->rseq_sched_delay)
384+
hrtick_local_start(50 * NSEC_PER_USEC);
385+
#endif
386+
}
387+
388+
void rseq_delay_resched_tick(void)
389+
{
390+
#ifdef CONFIG_SCHED_HRTICK
391+
struct task_struct *t = current;
392+
393+
if (t->rseq_sched_delay)
394+
set_tsk_need_resched(t);
395+
#endif
396+
}
397+
342398
#ifdef CONFIG_DEBUG_RSEQ
343399

344400
/*

kernel/sched/core.c

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -817,6 +817,7 @@ void update_rq_clock(struct rq *rq)
817817

818818
static void hrtick_clear(struct rq *rq)
819819
{
820+
rseq_delay_resched_tick();
820821
if (hrtimer_active(&rq->hrtick_timer))
821822
hrtimer_cancel(&rq->hrtick_timer);
822823
}
@@ -832,6 +833,8 @@ static enum hrtimer_restart hrtick(struct hrtimer *timer)
832833

833834
WARN_ON_ONCE(cpu_of(rq) != smp_processor_id());
834835

836+
rseq_delay_resched_tick();
837+
835838
rq_lock(rq, &rf);
836839
update_rq_clock(rq);
837840
rq->curr->sched_class->task_tick(rq, rq->curr, 1);
@@ -905,6 +908,16 @@ void hrtick_start(struct rq *rq, u64 delay)
905908

906909
#endif /* CONFIG_SMP */
907910

911+
void hrtick_local_start(u64 delay)
912+
{
913+
struct rq *rq = this_rq();
914+
struct rq_flags rf;
915+
916+
rq_lock(rq, &rf);
917+
hrtick_start(rq, delay);
918+
rq_unlock(rq, &rf);
919+
}
920+
908921
static void hrtick_rq_init(struct rq *rq)
909922
{
910923
#ifdef CONFIG_SMP
@@ -6663,6 +6676,9 @@ static void __sched notrace __schedule(int sched_mode)
66636676
picked:
66646677
clear_tsk_need_resched(prev);
66656678
clear_preempt_need_resched();
6679+
#ifdef CONFIG_RSEQ
6680+
prev->rseq_sched_delay = 0;
6681+
#endif
66666682
#ifdef CONFIG_SCHED_DEBUG
66676683
rq->last_seen_need_resched_ns = 0;
66686684
#endif

kernel/sched/syscalls.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1419,6 +1419,14 @@ static void do_sched_yield(void)
14191419
*/
14201420
SYSCALL_DEFINE0(sched_yield)
14211421
{
1422+
1423+
#ifdef CONFIG_RSEQ
1424+
if (current->rseq_sched_delay) {
1425+
schedule();
1426+
return 0;
1427+
}
1428+
#endif
1429+
14221430
do_sched_yield();
14231431
return 0;
14241432
}

0 commit comments

Comments
 (0)