Skip to content

Commit 740fcdc

Browse files
pierregondoisrafaeljw
authored andcommitted
cpufreq: CPPC: Register EM based on efficiency class information
Performance states and energy consumption values are not advertised in ACPI. In the GicC structure of the MADT table, the "Processor Power Efficiency Class field" (called efficiency class from now) allows to describe the relative energy efficiency of CPUs. To leverage the EM and EAS, the CPPC driver creates a set of artificial performance states and registers them in the Energy Model (EM), such as: - Every 20 capacity unit, a performance state is created. - The energy cost of each performance state gradually increases. No power value is generated as only the cost is used in the EM. During task placement, a task can raise the frequency of its whole pd. This can make EAS place a task on a pd with CPUs that are individually less energy efficient. As cost values are artificial, and to place tasks on CPUs with the lower efficiency class, a gap in cost values is generated for adjacent efficiency classes. E.g.: - efficiency class = 0, capacity is in [0-1024], so cost values are in [0: 51] (one performance state every 20 capacity unit) - efficiency class = 1, capacity is in [0-1024], cost values are in [1*gap+0: 1*gap+51]. The value of the cost gap is chosen to absorb a the energy of 4 CPUs at their maximum capacity. This means that between: 1- a pd of 4 CPUs, each of them being used at almost their full capacity. Their efficiency class is N. 2- a CPU using almost none of its capacity. Its efficiency class is N+1 EAS will choose the first option. This patch also populates the (struct cpufreq_driver).register_em callback if the valid efficiency_class ACPI values are provided. Signed-off-by: Pierre Gondois <[email protected]> Acked-by: Viresh Kumar <[email protected]> Signed-off-by: Rafael J. Wysocki <[email protected]>
1 parent d3c3db4 commit 740fcdc

File tree

1 file changed

+144
-0
lines changed

1 file changed

+144
-0
lines changed

drivers/cpufreq/cppc_cpufreq.c

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -421,6 +421,134 @@ static unsigned int cppc_cpufreq_get_transition_delay_us(unsigned int cpu)
421421
}
422422

423423
static DEFINE_PER_CPU(unsigned int, efficiency_class);
424+
static void cppc_cpufreq_register_em(struct cpufreq_policy *policy);
425+
426+
/* Create an artificial performance state every CPPC_EM_CAP_STEP capacity unit. */
427+
#define CPPC_EM_CAP_STEP (20)
428+
/* Increase the cost value by CPPC_EM_COST_STEP every performance state. */
429+
#define CPPC_EM_COST_STEP (1)
430+
/* Add a cost gap correspnding to the energy of 4 CPUs. */
431+
#define CPPC_EM_COST_GAP (4 * SCHED_CAPACITY_SCALE * CPPC_EM_COST_STEP \
432+
/ CPPC_EM_CAP_STEP)
433+
434+
static unsigned int get_perf_level_count(struct cpufreq_policy *policy)
435+
{
436+
struct cppc_perf_caps *perf_caps;
437+
unsigned int min_cap, max_cap;
438+
struct cppc_cpudata *cpu_data;
439+
int cpu = policy->cpu;
440+
441+
cpu_data = policy->driver_data;
442+
perf_caps = &cpu_data->perf_caps;
443+
max_cap = arch_scale_cpu_capacity(cpu);
444+
min_cap = div_u64(max_cap * perf_caps->lowest_perf, perf_caps->highest_perf);
445+
if ((min_cap == 0) || (max_cap < min_cap))
446+
return 0;
447+
return 1 + max_cap / CPPC_EM_CAP_STEP - min_cap / CPPC_EM_CAP_STEP;
448+
}
449+
450+
/*
451+
* The cost is defined as:
452+
* cost = power * max_frequency / frequency
453+
*/
454+
static inline unsigned long compute_cost(int cpu, int step)
455+
{
456+
return CPPC_EM_COST_GAP * per_cpu(efficiency_class, cpu) +
457+
step * CPPC_EM_COST_STEP;
458+
}
459+
460+
static int cppc_get_cpu_power(struct device *cpu_dev,
461+
unsigned long *power, unsigned long *KHz)
462+
{
463+
unsigned long perf_step, perf_prev, perf, perf_check;
464+
unsigned int min_step, max_step, step, step_check;
465+
unsigned long prev_freq = *KHz;
466+
unsigned int min_cap, max_cap;
467+
struct cpufreq_policy *policy;
468+
469+
struct cppc_perf_caps *perf_caps;
470+
struct cppc_cpudata *cpu_data;
471+
472+
policy = cpufreq_cpu_get_raw(cpu_dev->id);
473+
cpu_data = policy->driver_data;
474+
perf_caps = &cpu_data->perf_caps;
475+
max_cap = arch_scale_cpu_capacity(cpu_dev->id);
476+
min_cap = div_u64(max_cap * perf_caps->lowest_perf,
477+
perf_caps->highest_perf);
478+
479+
perf_step = CPPC_EM_CAP_STEP * perf_caps->highest_perf / max_cap;
480+
min_step = min_cap / CPPC_EM_CAP_STEP;
481+
max_step = max_cap / CPPC_EM_CAP_STEP;
482+
483+
perf_prev = cppc_cpufreq_khz_to_perf(cpu_data, *KHz);
484+
step = perf_prev / perf_step;
485+
486+
if (step > max_step)
487+
return -EINVAL;
488+
489+
if (min_step == max_step) {
490+
step = max_step;
491+
perf = perf_caps->highest_perf;
492+
} else if (step < min_step) {
493+
step = min_step;
494+
perf = perf_caps->lowest_perf;
495+
} else {
496+
step++;
497+
if (step == max_step)
498+
perf = perf_caps->highest_perf;
499+
else
500+
perf = step * perf_step;
501+
}
502+
503+
*KHz = cppc_cpufreq_perf_to_khz(cpu_data, perf);
504+
perf_check = cppc_cpufreq_khz_to_perf(cpu_data, *KHz);
505+
step_check = perf_check / perf_step;
506+
507+
/*
508+
* To avoid bad integer approximation, check that new frequency value
509+
* increased and that the new frequency will be converted to the
510+
* desired step value.
511+
*/
512+
while ((*KHz == prev_freq) || (step_check != step)) {
513+
perf++;
514+
*KHz = cppc_cpufreq_perf_to_khz(cpu_data, perf);
515+
perf_check = cppc_cpufreq_khz_to_perf(cpu_data, *KHz);
516+
step_check = perf_check / perf_step;
517+
}
518+
519+
/*
520+
* With an artificial EM, only the cost value is used. Still the power
521+
* is populated such as 0 < power < EM_MAX_POWER. This allows to add
522+
* more sense to the artificial performance states.
523+
*/
524+
*power = compute_cost(cpu_dev->id, step);
525+
526+
return 0;
527+
}
528+
529+
static int cppc_get_cpu_cost(struct device *cpu_dev, unsigned long KHz,
530+
unsigned long *cost)
531+
{
532+
unsigned long perf_step, perf_prev;
533+
struct cppc_perf_caps *perf_caps;
534+
struct cpufreq_policy *policy;
535+
struct cppc_cpudata *cpu_data;
536+
unsigned int max_cap;
537+
int step;
538+
539+
policy = cpufreq_cpu_get_raw(cpu_dev->id);
540+
cpu_data = policy->driver_data;
541+
perf_caps = &cpu_data->perf_caps;
542+
max_cap = arch_scale_cpu_capacity(cpu_dev->id);
543+
544+
perf_prev = cppc_cpufreq_khz_to_perf(cpu_data, KHz);
545+
perf_step = CPPC_EM_CAP_STEP * perf_caps->highest_perf / max_cap;
546+
step = perf_prev / perf_step;
547+
548+
*cost = compute_cost(cpu_dev->id, step);
549+
550+
return 0;
551+
}
424552

425553
static int populate_efficiency_class(void)
426554
{
@@ -453,10 +581,23 @@ static int populate_efficiency_class(void)
453581
}
454582
index++;
455583
}
584+
cppc_cpufreq_driver.register_em = cppc_cpufreq_register_em;
456585

457586
return 0;
458587
}
459588

589+
static void cppc_cpufreq_register_em(struct cpufreq_policy *policy)
590+
{
591+
struct cppc_cpudata *cpu_data;
592+
struct em_data_callback em_cb =
593+
EM_ADV_DATA_CB(cppc_get_cpu_power, cppc_get_cpu_cost);
594+
595+
cpu_data = policy->driver_data;
596+
em_dev_register_perf_domain(get_cpu_device(policy->cpu),
597+
get_perf_level_count(policy), &em_cb,
598+
cpu_data->shared_cpu_map, 0);
599+
}
600+
460601
#else
461602

462603
static unsigned int cppc_cpufreq_get_transition_delay_us(unsigned int cpu)
@@ -467,6 +608,9 @@ static int populate_efficiency_class(void)
467608
{
468609
return 0;
469610
}
611+
static void cppc_cpufreq_register_em(struct cpufreq_policy *policy)
612+
{
613+
}
470614
#endif
471615

472616

0 commit comments

Comments
 (0)