Skip to content

Commit ee2cc42

Browse files
committed
cpufreq: Add special-purpose fast-switching callback for drivers
First off, some cpufreq drivers (eg. intel_pstate) can pass hints beyond the current target frequency to the hardware and there are no provisions for doing that in the cpufreq framework. In particular, today the driver has to assume that it should not allow the frequency to fall below the one requested by the governor (or the required capacity may not be provided) which may not be the case and which may lead to excessive energy usage in some scenarios. Second, the hints passed by these drivers to the hardware need not be in terms of the frequency, so representing the utilization numbers coming from the scheduler as frequency before passing them to those drivers is not really useful. Address the two points above by adding a special-purpose replacement for the ->fast_switch callback, called ->adjust_perf, allowing the governor to pass abstract performance level (rather than frequency) values for the minimum (required) and target (desired) performance along with the CPU capacity to compare them to. Also update the schedutil governor to use the new callback instead of ->fast_switch if present and if the utilization mertics are frequency-invariant (that is requisite for the direct mapping between the utilization and the CPU performance levels to be a reasonable approximation). Signed-off-by: Rafael J. Wysocki <[email protected]> Acked-by: Viresh Kumar <[email protected]>
1 parent ca6827d commit ee2cc42

File tree

4 files changed

+117
-10
lines changed

4 files changed

+117
-10
lines changed

drivers/cpufreq/cpufreq.c

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2097,6 +2097,46 @@ unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy,
20972097
}
20982098
EXPORT_SYMBOL_GPL(cpufreq_driver_fast_switch);
20992099

2100+
/**
2101+
* cpufreq_driver_adjust_perf - Adjust CPU performance level in one go.
2102+
* @cpu: Target CPU.
2103+
* @min_perf: Minimum (required) performance level (units of @capacity).
2104+
* @target_perf: Terget (desired) performance level (units of @capacity).
2105+
* @capacity: Capacity of the target CPU.
2106+
*
2107+
* Carry out a fast performance level switch of @cpu without sleeping.
2108+
*
2109+
* The driver's ->adjust_perf() callback invoked by this function must be
2110+
* suitable for being called from within RCU-sched read-side critical sections
2111+
* and it is expected to select a suitable performance level equal to or above
2112+
* @min_perf and preferably equal to or below @target_perf.
2113+
*
2114+
* This function must not be called if policy->fast_switch_enabled is unset.
2115+
*
2116+
* Governors calling this function must guarantee that it will never be invoked
2117+
* twice in parallel for the same CPU and that it will never be called in
2118+
* parallel with either ->target() or ->target_index() or ->fast_switch() for
2119+
* the same CPU.
2120+
*/
2121+
void cpufreq_driver_adjust_perf(unsigned int cpu,
2122+
unsigned long min_perf,
2123+
unsigned long target_perf,
2124+
unsigned long capacity)
2125+
{
2126+
cpufreq_driver->adjust_perf(cpu, min_perf, target_perf, capacity);
2127+
}
2128+
2129+
/**
2130+
* cpufreq_driver_has_adjust_perf - Check "direct fast switch" callback.
2131+
*
2132+
* Return 'true' if the ->adjust_perf callback is present for the
2133+
* current driver or 'false' otherwise.
2134+
*/
2135+
bool cpufreq_driver_has_adjust_perf(void)
2136+
{
2137+
return !!cpufreq_driver->adjust_perf;
2138+
}
2139+
21002140
/* Must set freqs->new to intermediate frequency */
21012141
static int __target_intermediate(struct cpufreq_policy *policy,
21022142
struct cpufreq_freqs *freqs, int index)

include/linux/cpufreq.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,15 @@ struct cpufreq_driver {
320320
unsigned int index);
321321
unsigned int (*fast_switch)(struct cpufreq_policy *policy,
322322
unsigned int target_freq);
323+
/*
324+
* ->fast_switch() replacement for drivers that use an internal
325+
* representation of performance levels and can pass hints other than
326+
* the target performance level to the hardware.
327+
*/
328+
void (*adjust_perf)(unsigned int cpu,
329+
unsigned long min_perf,
330+
unsigned long target_perf,
331+
unsigned long capacity);
323332

324333
/*
325334
* Caches and returns the lowest driver-supported frequency greater than
@@ -588,6 +597,11 @@ struct cpufreq_governor {
588597
/* Pass a target to the cpufreq driver */
589598
unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy,
590599
unsigned int target_freq);
600+
void cpufreq_driver_adjust_perf(unsigned int cpu,
601+
unsigned long min_perf,
602+
unsigned long target_perf,
603+
unsigned long capacity);
604+
bool cpufreq_driver_has_adjust_perf(void);
591605
int cpufreq_driver_target(struct cpufreq_policy *policy,
592606
unsigned int target_freq,
593607
unsigned int relation);

include/linux/sched/cpufreq.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,11 @@ static inline unsigned long map_util_freq(unsigned long util,
2828
{
2929
return (freq + (freq >> 2)) * util / cap;
3030
}
31+
32+
static inline unsigned long map_util_perf(unsigned long util)
33+
{
34+
return util + (util >> 2);
35+
}
3136
#endif /* CONFIG_CPU_FREQ */
3237

3338
#endif /* _LINUX_SCHED_CPUFREQ_H */

kernel/sched/cpufreq_schedutil.c

Lines changed: 58 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -432,25 +432,36 @@ static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu, struct sugov_p
432432
sg_policy->limits_changed = true;
433433
}
434434

435-
static void sugov_update_single(struct update_util_data *hook, u64 time,
436-
unsigned int flags)
435+
static inline bool sugov_update_single_common(struct sugov_cpu *sg_cpu,
436+
u64 time, unsigned int flags)
437437
{
438-
struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
439438
struct sugov_policy *sg_policy = sg_cpu->sg_policy;
440-
unsigned int cached_freq = sg_policy->cached_raw_freq;
441-
unsigned int next_f;
442439

443440
sugov_iowait_boost(sg_cpu, time, flags);
444441
sg_cpu->last_update = time;
445442

446443
ignore_dl_rate_limit(sg_cpu, sg_policy);
447444

448445
if (!sugov_should_update_freq(sg_policy, time))
449-
return;
446+
return false;
450447

451448
sugov_get_util(sg_cpu);
452449
sugov_iowait_apply(sg_cpu, time);
453450

451+
return true;
452+
}
453+
454+
static void sugov_update_single_freq(struct update_util_data *hook, u64 time,
455+
unsigned int flags)
456+
{
457+
struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
458+
struct sugov_policy *sg_policy = sg_cpu->sg_policy;
459+
unsigned int cached_freq = sg_policy->cached_raw_freq;
460+
unsigned int next_f;
461+
462+
if (!sugov_update_single_common(sg_cpu, time, flags))
463+
return;
464+
454465
next_f = get_next_freq(sg_policy, sg_cpu->util, sg_cpu->max);
455466
/*
456467
* Do not reduce the frequency if the CPU has not been idle
@@ -477,6 +488,38 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
477488
}
478489
}
479490

491+
static void sugov_update_single_perf(struct update_util_data *hook, u64 time,
492+
unsigned int flags)
493+
{
494+
struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
495+
unsigned long prev_util = sg_cpu->util;
496+
497+
/*
498+
* Fall back to the "frequency" path if frequency invariance is not
499+
* supported, because the direct mapping between the utilization and
500+
* the performance levels depends on the frequency invariance.
501+
*/
502+
if (!arch_scale_freq_invariant()) {
503+
sugov_update_single_freq(hook, time, flags);
504+
return;
505+
}
506+
507+
if (!sugov_update_single_common(sg_cpu, time, flags))
508+
return;
509+
510+
/*
511+
* Do not reduce the target performance level if the CPU has not been
512+
* idle recently, as the reduction is likely to be premature then.
513+
*/
514+
if (sugov_cpu_is_busy(sg_cpu) && sg_cpu->util < prev_util)
515+
sg_cpu->util = prev_util;
516+
517+
cpufreq_driver_adjust_perf(sg_cpu->cpu, map_util_perf(sg_cpu->bw_dl),
518+
map_util_perf(sg_cpu->util), sg_cpu->max);
519+
520+
sg_cpu->sg_policy->last_freq_update_time = time;
521+
}
522+
480523
static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time)
481524
{
482525
struct sugov_policy *sg_policy = sg_cpu->sg_policy;
@@ -815,6 +858,7 @@ static void sugov_exit(struct cpufreq_policy *policy)
815858
static int sugov_start(struct cpufreq_policy *policy)
816859
{
817860
struct sugov_policy *sg_policy = policy->governor_data;
861+
void (*uu)(struct update_util_data *data, u64 time, unsigned int flags);
818862
unsigned int cpu;
819863

820864
sg_policy->freq_update_delay_ns = sg_policy->tunables->rate_limit_us * NSEC_PER_USEC;
@@ -834,13 +878,17 @@ static int sugov_start(struct cpufreq_policy *policy)
834878
sg_cpu->sg_policy = sg_policy;
835879
}
836880

881+
if (policy_is_shared(policy))
882+
uu = sugov_update_shared;
883+
else if (policy->fast_switch_enabled && cpufreq_driver_has_adjust_perf())
884+
uu = sugov_update_single_perf;
885+
else
886+
uu = sugov_update_single_freq;
887+
837888
for_each_cpu(cpu, policy->cpus) {
838889
struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu);
839890

840-
cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util,
841-
policy_is_shared(policy) ?
842-
sugov_update_shared :
843-
sugov_update_single);
891+
cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, uu);
844892
}
845893
return 0;
846894
}

0 commit comments

Comments
 (0)