Skip to content

Commit 5484e31

Browse files
committed
cpuidle: menu: Skip tick_nohz_get_sleep_length() call in some cases
Because the cost of calling tick_nohz_get_sleep_length() may increase in the future, reorder the code in menu_select() so it first uses the statistics to determine the expected idle duration. If that value is higher than RESIDENCY_THRESHOLD_NS, tick_nohz_get_sleep_length() will be called to obtain the time till the closest timer and refine the idle duration prediction if necessary. This causes the governor to always take the full overhead of get_typical_interval() with the assumption that the cost will be amortized by skipping the tick_nohz_get_sleep_length() call in the cases when the predicted idle duration is relatively very small. Signed-off-by: Rafael J. Wysocki <[email protected]> Tested-by: Doug Smythies <[email protected]>
1 parent 2662342 commit 5484e31

File tree

3 files changed

+54
-34
lines changed

3 files changed

+54
-34
lines changed

drivers/cpuidle/governors/gov.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
/* SPDX-License-Identifier: GPL-2.0 */
2+
3+
/* Common definitions for cpuidle governors. */
4+
5+
#ifndef __CPUIDLE_GOVERNOR_H
6+
#define __CPUIDLE_GOVERNOR_H
7+
8+
/*
9+
* Idle state target residency threshold used for deciding whether or not to
10+
* check the time till the closest expected timer event.
11+
*/
12+
#define RESIDENCY_THRESHOLD_NS (15 * NSEC_PER_USEC)
13+
14+
#endif /* __CPUIDLE_GOVERNOR_H */

drivers/cpuidle/governors/menu.c

Lines changed: 38 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919
#include <linux/sched/stat.h>
2020
#include <linux/math64.h>
2121

22+
#include "gov.h"
23+
2224
#define BUCKETS 12
2325
#define INTERVAL_SHIFT 3
2426
#define INTERVALS (1UL << INTERVAL_SHIFT)
@@ -166,8 +168,7 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev);
166168
* of points is below a threshold. If it is... then use the
167169
* average of these 8 points as the estimated value.
168170
*/
169-
static unsigned int get_typical_interval(struct menu_device *data,
170-
unsigned int predicted_us)
171+
static unsigned int get_typical_interval(struct menu_device *data)
171172
{
172173
int i, divisor;
173174
unsigned int min, max, thresh, avg;
@@ -195,11 +196,7 @@ static unsigned int get_typical_interval(struct menu_device *data,
195196
}
196197
}
197198

198-
/*
199-
* If the result of the computation is going to be discarded anyway,
200-
* avoid the computation altogether.
201-
*/
202-
if (min >= predicted_us)
199+
if (!max)
203200
return UINT_MAX;
204201

205202
if (divisor == INTERVALS)
@@ -267,7 +264,6 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
267264
{
268265
struct menu_device *data = this_cpu_ptr(&menu_devices);
269266
s64 latency_req = cpuidle_governor_latency_req(dev->cpu);
270-
unsigned int predicted_us;
271267
u64 predicted_ns;
272268
u64 interactivity_req;
273269
unsigned int nr_iowaiters;
@@ -279,16 +275,41 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
279275
data->needs_update = 0;
280276
}
281277

282-
/* determine the expected residency time, round up */
283-
delta = tick_nohz_get_sleep_length(&delta_tick);
284-
if (unlikely(delta < 0)) {
285-
delta = 0;
286-
delta_tick = 0;
287-
}
288-
data->next_timer_ns = delta;
289-
290278
nr_iowaiters = nr_iowait_cpu(dev->cpu);
291-
data->bucket = which_bucket(data->next_timer_ns, nr_iowaiters);
279+
280+
/* Find the shortest expected idle interval. */
281+
predicted_ns = get_typical_interval(data) * NSEC_PER_USEC;
282+
if (predicted_ns > RESIDENCY_THRESHOLD_NS) {
283+
unsigned int timer_us;
284+
285+
/* Determine the time till the closest timer. */
286+
delta = tick_nohz_get_sleep_length(&delta_tick);
287+
if (unlikely(delta < 0)) {
288+
delta = 0;
289+
delta_tick = 0;
290+
}
291+
292+
data->next_timer_ns = delta;
293+
data->bucket = which_bucket(data->next_timer_ns, nr_iowaiters);
294+
295+
/* Round up the result for half microseconds. */
296+
timer_us = div_u64((RESOLUTION * DECAY * NSEC_PER_USEC) / 2 +
297+
data->next_timer_ns *
298+
data->correction_factor[data->bucket],
299+
RESOLUTION * DECAY * NSEC_PER_USEC);
300+
/* Use the lowest expected idle interval to pick the idle state. */
301+
predicted_ns = min((u64)timer_us * NSEC_PER_USEC, predicted_ns);
302+
} else {
303+
/*
304+
* Because the next timer event is not going to be determined
305+
* in this case, assume that without the tick the closest timer
306+
* will be in distant future and that the closest tick will occur
307+
* after 1/2 of the tick period.
308+
*/
309+
data->next_timer_ns = KTIME_MAX;
310+
delta_tick = TICK_NSEC / 2;
311+
data->bucket = which_bucket(KTIME_MAX, nr_iowaiters);
312+
}
292313

293314
if (unlikely(drv->state_count <= 1 || latency_req == 0) ||
294315
((data->next_timer_ns < drv->states[1].target_residency_ns ||
@@ -303,16 +324,6 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
303324
return 0;
304325
}
305326

306-
/* Round up the result for half microseconds. */
307-
predicted_us = div_u64(data->next_timer_ns *
308-
data->correction_factor[data->bucket] +
309-
(RESOLUTION * DECAY * NSEC_PER_USEC) / 2,
310-
RESOLUTION * DECAY * NSEC_PER_USEC);
311-
/* Use the lowest expected idle interval to pick the idle state. */
312-
predicted_ns = (u64)min(predicted_us,
313-
get_typical_interval(data, predicted_us)) *
314-
NSEC_PER_USEC;
315-
316327
if (tick_nohz_tick_stopped()) {
317328
/*
318329
* If the tick is already stopped, the cost of possible short

drivers/cpuidle/governors/teo.c

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,8 @@
140140
#include <linux/sched/topology.h>
141141
#include <linux/tick.h>
142142

143+
#include "gov.h"
144+
143145
/*
144146
* The number of bits to shift the CPU's capacity by in order to determine
145147
* the utilized threshold.
@@ -152,7 +154,6 @@
152154
*/
153155
#define UTIL_THRESHOLD_SHIFT 6
154156

155-
156157
/*
157158
* The PULSE value is added to metrics when they grow and the DECAY_SHIFT value
158159
* is used for decreasing metrics on a regular basis.
@@ -166,12 +167,6 @@
166167
*/
167168
#define NR_RECENT 9
168169

169-
/*
170-
* Idle state target residency threshold used for deciding whether or not to
171-
* check the time till the closest expected timer event.
172-
*/
173-
#define RESIDENCY_THRESHOLD_NS (15 * NSEC_PER_USEC)
174-
175170
/**
176171
* struct teo_bin - Metrics used by the TEO cpuidle governor.
177172
* @intercepts: The "intercepts" metric.

0 commit comments

Comments
 (0)