Skip to content

Commit 38f8309

Browse files
cloehlerafaeljw
authored andcommitted
cpuidle: menu: Remove iowait influence
Remove CPU iowaiters influence on idle state selection. Remove the menu notion of performance multiplier which increased with the number of tasks that went to iowait sleep on this CPU and haven't woken up yet. Relying on iowait for cpuidle is problematic for a few reasons: 1. There is no guarantee that an iowaiting task will wake up on the same CPU. 2. The task being in iowait says nothing about the idle duration, we could be selecting shallower states for a long time. 3. The task being in iowait doesn't always imply a performance hit with increased latency. 4. If there is such a performance hit, the number of iowaiting tasks doesn't directly correlate. 5. The definition of iowait altogether is vague at best, it is sprinkled across kernel code. Signed-off-by: Christian Loehle <[email protected]> Link: https://patch.msgid.link/[email protected] [ rjw: Minor edits in the changelog ] Signed-off-by: Rafael J. Wysocki <[email protected]>
1 parent 9852d85 commit 38f8309

File tree

1 file changed

+9
-67
lines changed
  • drivers/cpuidle/governors

1 file changed

+9
-67
lines changed

drivers/cpuidle/governors/menu.c

Lines changed: 9 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919

2020
#include "gov.h"
2121

22-
#define BUCKETS 12
22+
#define BUCKETS 6
2323
#define INTERVAL_SHIFT 3
2424
#define INTERVALS (1UL << INTERVAL_SHIFT)
2525
#define RESOLUTION 1024
@@ -29,12 +29,11 @@
2929
/*
3030
* Concepts and ideas behind the menu governor
3131
*
32-
* For the menu governor, there are 3 decision factors for picking a C
32+
* For the menu governor, there are 2 decision factors for picking a C
3333
* state:
3434
* 1) Energy break even point
35-
* 2) Performance impact
36-
* 3) Latency tolerance (from pmqos infrastructure)
37-
* These three factors are treated independently.
35+
* 2) Latency tolerance (from pmqos infrastructure)
36+
* These two factors are treated independently.
3837
*
3938
* Energy break even point
4039
* -----------------------
@@ -75,30 +74,6 @@
7574
* intervals and if the stand deviation of these 8 intervals is below a
7675
* threshold value, we use the average of these intervals as prediction.
7776
*
78-
* Limiting Performance Impact
79-
* ---------------------------
80-
* C states, especially those with large exit latencies, can have a real
81-
* noticeable impact on workloads, which is not acceptable for most sysadmins,
82-
* and in addition, less performance has a power price of its own.
83-
*
84-
* As a general rule of thumb, menu assumes that the following heuristic
85-
* holds:
86-
* The busier the system, the less impact of C states is acceptable
87-
*
88-
* This rule-of-thumb is implemented using a performance-multiplier:
89-
* If the exit latency times the performance multiplier is longer than
90-
* the predicted duration, the C state is not considered a candidate
91-
* for selection due to a too high performance impact. So the higher
92-
* this multiplier is, the longer we need to be idle to pick a deep C
93-
* state, and thus the less likely a busy CPU will hit such a deep
94-
* C state.
95-
*
96-
* Currently there is only one value determining the factor:
97-
* 10 points are added for each process that is waiting for IO on this CPU.
98-
* (This value was experimentally determined.)
99-
* Utilization is no longer a factor as it was shown that it never contributed
100-
* significantly to the performance multiplier in the first place.
101-
*
10277
*/
10378

10479
struct menu_device {
@@ -112,19 +87,10 @@ struct menu_device {
11287
int interval_ptr;
11388
};
11489

115-
static inline int which_bucket(u64 duration_ns, unsigned int nr_iowaiters)
90+
static inline int which_bucket(u64 duration_ns)
11691
{
11792
int bucket = 0;
11893

119-
/*
120-
* We keep two groups of stats; one with no
121-
* IO pending, one without.
122-
* This allows us to calculate
123-
* E(duration)|iowait
124-
*/
125-
if (nr_iowaiters)
126-
bucket = BUCKETS/2;
127-
12894
if (duration_ns < 10ULL * NSEC_PER_USEC)
12995
return bucket;
13096
if (duration_ns < 100ULL * NSEC_PER_USEC)
@@ -138,19 +104,6 @@ static inline int which_bucket(u64 duration_ns, unsigned int nr_iowaiters)
138104
return bucket + 5;
139105
}
140106

141-
/*
142-
* Return a multiplier for the exit latency that is intended
143-
* to take performance requirements into account.
144-
* The more performance critical we estimate the system
145-
* to be, the higher this multiplier, and thus the higher
146-
* the barrier to go to an expensive C state.
147-
*/
148-
static inline int performance_multiplier(unsigned int nr_iowaiters)
149-
{
150-
/* for IO wait tasks (per cpu!) we add 10x each */
151-
return 1 + 10 * nr_iowaiters;
152-
}
153-
154107
static DEFINE_PER_CPU(struct menu_device, menu_devices);
155108

156109
static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev);
@@ -258,8 +211,6 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
258211
struct menu_device *data = this_cpu_ptr(&menu_devices);
259212
s64 latency_req = cpuidle_governor_latency_req(dev->cpu);
260213
u64 predicted_ns;
261-
u64 interactivity_req;
262-
unsigned int nr_iowaiters;
263214
ktime_t delta, delta_tick;
264215
int i, idx;
265216

@@ -268,8 +219,6 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
268219
data->needs_update = 0;
269220
}
270221

271-
nr_iowaiters = nr_iowait_cpu(dev->cpu);
272-
273222
/* Find the shortest expected idle interval. */
274223
predicted_ns = get_typical_interval(data) * NSEC_PER_USEC;
275224
if (predicted_ns > RESIDENCY_THRESHOLD_NS) {
@@ -283,7 +232,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
283232
}
284233

285234
data->next_timer_ns = delta;
286-
data->bucket = which_bucket(data->next_timer_ns, nr_iowaiters);
235+
data->bucket = which_bucket(data->next_timer_ns);
287236

288237
/* Round up the result for half microseconds. */
289238
timer_us = div_u64((RESOLUTION * DECAY * NSEC_PER_USEC) / 2 +
@@ -301,7 +250,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
301250
*/
302251
data->next_timer_ns = KTIME_MAX;
303252
delta_tick = TICK_NSEC / 2;
304-
data->bucket = which_bucket(KTIME_MAX, nr_iowaiters);
253+
data->bucket = which_bucket(KTIME_MAX);
305254
}
306255

307256
if (unlikely(drv->state_count <= 1 || latency_req == 0) ||
@@ -328,15 +277,8 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
328277
*/
329278
if (predicted_ns < TICK_NSEC)
330279
predicted_ns = data->next_timer_ns;
331-
} else {
332-
/*
333-
* Use the performance multiplier and the user-configurable
334-
* latency_req to determine the maximum exit latency.
335-
*/
336-
interactivity_req = div64_u64(predicted_ns,
337-
performance_multiplier(nr_iowaiters));
338-
if (latency_req > interactivity_req)
339-
latency_req = interactivity_req;
280+
} else if (latency_req > predicted_ns) {
281+
latency_req = predicted_ns;
340282
}
341283

342284
/*

0 commit comments

Comments
 (0)