Skip to content

Commit c0f4dfd

Browse files
Paul E. McKenneypaulmck
authored andcommitted
rcu: Make RCU_FAST_NO_HZ take advantage of numbered callbacks
Because RCU callbacks are now associated with the number of the grace period that they must wait for, CPUs can now take advance callbacks corresponding to grace periods that ended while a given CPU was in dyntick-idle mode. This eliminates the need to try forcing the RCU state machine while entering idle, thus reducing the CPU intensiveness of RCU_FAST_NO_HZ, which should increase its energy efficiency. Signed-off-by: Paul E. McKenney <[email protected]> Signed-off-by: Paul E. McKenney <[email protected]>
1 parent b11cc57 commit c0f4dfd

File tree

7 files changed

+149
-313
lines changed

7 files changed

+149
-313
lines changed

Documentation/kernel-parameters.txt

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2490,6 +2490,17 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
24902490
leaf rcu_node structure. Useful for very large
24912491
systems.
24922492

2493+
rcutree.jiffies_till_first_fqs= [KNL,BOOT]
2494+
Set delay from grace-period initialization to
2495+
first attempt to force quiescent states.
2496+
Units are jiffies, minimum value is zero,
2497+
and maximum value is HZ.
2498+
2499+
rcutree.jiffies_till_next_fqs= [KNL,BOOT]
2500+
Set delay between subsequent attempts to force
2501+
quiescent states. Units are jiffies, minimum
2502+
value is one, and maximum value is HZ.
2503+
24932504
rcutree.qhimark= [KNL,BOOT]
24942505
Set threshold of queued
24952506
RCU callbacks over which batch limiting is disabled.
@@ -2504,16 +2515,15 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
25042515
rcutree.rcu_cpu_stall_timeout= [KNL,BOOT]
25052516
Set timeout for RCU CPU stall warning messages.
25062517

2507-
rcutree.jiffies_till_first_fqs= [KNL,BOOT]
2508-
Set delay from grace-period initialization to
2509-
first attempt to force quiescent states.
2510-
Units are jiffies, minimum value is zero,
2511-
and maximum value is HZ.
2518+
rcutree.rcu_idle_gp_delay= [KNL,BOOT]
2519+
Set wakeup interval for idle CPUs that have
2520+
RCU callbacks (RCU_FAST_NO_HZ=y).
25122521

2513-
rcutree.jiffies_till_next_fqs= [KNL,BOOT]
2514-
Set delay between subsequent attempts to force
2515-
quiescent states. Units are jiffies, minimum
2516-
value is one, and maximum value is HZ.
2522+
rcutree.rcu_idle_lazy_gp_delay= [KNL,BOOT]
2523+
Set wakeup interval for idle CPUs that have
2524+
only "lazy" RCU callbacks (RCU_FAST_NO_HZ=y).
2525+
Lazy RCU callbacks are those which RCU can
2526+
prove do nothing more than free memory.
25172527

25182528
rcutorture.fqs_duration= [KNL,BOOT]
25192529
Set duration of force_quiescent_state bursts.

include/linux/rcupdate.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ extern void do_trace_rcu_torture_read(char *rcutorturename,
8080
#define UINT_CMP_LT(a, b) (UINT_MAX / 2 < (a) - (b))
8181
#define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b))
8282
#define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b))
83+
#define ulong2long(a) (*(long *)(&(a)))
8384

8485
/* Exported common interfaces */
8586

init/Kconfig

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -582,13 +582,16 @@ config RCU_FAST_NO_HZ
582582
depends on NO_HZ && SMP
583583
default n
584584
help
585-
This option causes RCU to attempt to accelerate grace periods in
586-
order to allow CPUs to enter dynticks-idle state more quickly.
587-
On the other hand, this option increases the overhead of the
588-
dynticks-idle checking, thus degrading scheduling latency.
589-
590-
Say Y if energy efficiency is critically important, and you don't
591-
care about real-time response.
585+
This option permits CPUs to enter dynticks-idle state even if
586+
they have RCU callbacks queued, and prevents RCU from waking
587+
these CPUs up more than roughly once every four jiffies (by
588+
default, you can adjust this using the rcutree.rcu_idle_gp_delay
589+
parameter), thus improving energy efficiency. On the other
590+
hand, this option increases the duration of RCU grace periods,
591+
for example, slowing down synchronize_rcu().
592+
593+
Say Y if energy efficiency is critically important, and you
594+
don't care about increased grace-period durations.
592595

593596
Say N if you are unsure.
594597

kernel/rcutree.c

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2640,19 +2640,27 @@ static int rcu_pending(int cpu)
26402640
}
26412641

26422642
/*
2643-
* Check to see if any future RCU-related work will need to be done
2644-
* by the current CPU, even if none need be done immediately, returning
2645-
* 1 if so.
2643+
* Return true if the specified CPU has any callback. If all_lazy is
2644+
* non-NULL, store an indication of whether all callbacks are lazy.
2645+
* (If there are no callbacks, all of them are deemed to be lazy.)
26462646
*/
2647-
static int rcu_cpu_has_callbacks(int cpu)
2647+
static int rcu_cpu_has_callbacks(int cpu, bool *all_lazy)
26482648
{
2649+
bool al = true;
2650+
bool hc = false;
2651+
struct rcu_data *rdp;
26492652
struct rcu_state *rsp;
26502653

2651-
/* RCU callbacks either ready or pending? */
2652-
for_each_rcu_flavor(rsp)
2653-
if (per_cpu_ptr(rsp->rda, cpu)->nxtlist)
2654-
return 1;
2655-
return 0;
2654+
for_each_rcu_flavor(rsp) {
2655+
rdp = per_cpu_ptr(rsp->rda, cpu);
2656+
if (rdp->qlen != rdp->qlen_lazy)
2657+
al = false;
2658+
if (rdp->nxtlist)
2659+
hc = true;
2660+
}
2661+
if (all_lazy)
2662+
*all_lazy = al;
2663+
return hc;
26562664
}
26572665

26582666
/*
@@ -2871,7 +2879,6 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
28712879
rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
28722880
atomic_set(&rdp->dynticks->dynticks,
28732881
(atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1);
2874-
rcu_prepare_for_idle_init(cpu);
28752882
raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
28762883

28772884
/* Add CPU to rcu_node bitmasks. */
@@ -2945,7 +2952,6 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
29452952
*/
29462953
for_each_rcu_flavor(rsp)
29472954
rcu_cleanup_dying_cpu(rsp);
2948-
rcu_cleanup_after_idle(cpu);
29492955
break;
29502956
case CPU_DEAD:
29512957
case CPU_DEAD_FROZEN:

kernel/rcutree.h

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -88,18 +88,13 @@ struct rcu_dynticks {
8888
int dynticks_nmi_nesting; /* Track NMI nesting level. */
8989
atomic_t dynticks; /* Even value for idle, else odd. */
9090
#ifdef CONFIG_RCU_FAST_NO_HZ
91-
int dyntick_drain; /* Prepare-for-idle state variable. */
92-
unsigned long dyntick_holdoff;
93-
/* No retries for the jiffy of failure. */
94-
struct timer_list idle_gp_timer;
95-
/* Wake up CPU sleeping with callbacks. */
96-
unsigned long idle_gp_timer_expires;
97-
/* When to wake up CPU (for repost). */
98-
bool idle_first_pass; /* First pass of attempt to go idle? */
91+
bool all_lazy; /* Are all CPU's CBs lazy? */
9992
unsigned long nonlazy_posted;
10093
/* # times non-lazy CBs posted to CPU. */
10194
unsigned long nonlazy_posted_snap;
10295
/* idle-period nonlazy_posted snapshot. */
96+
unsigned long last_accelerate;
97+
/* Last jiffy CBs were accelerated. */
10398
int tick_nohz_enabled_snap; /* Previously seen value from sysfs. */
10499
#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
105100
};
@@ -521,7 +516,6 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
521516
struct rcu_node *rnp);
522517
#endif /* #ifdef CONFIG_RCU_BOOST */
523518
static void __cpuinit rcu_prepare_kthreads(int cpu);
524-
static void rcu_prepare_for_idle_init(int cpu);
525519
static void rcu_cleanup_after_idle(int cpu);
526520
static void rcu_prepare_for_idle(int cpu);
527521
static void rcu_idle_count_callbacks_posted(void);

0 commit comments

Comments
 (0)