Skip to content

Commit 291009f

Browse files
committed
Merge tag 'pm-5.11-rc8' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
Pull power management fixes from Rafael Wysocki: "Address a performance regression related to scale-invariance on x86 that may prevent turbo CPU frequencies from being used in certain workloads on systems using acpi-cpufreq as the CPU performance scaling driver and schedutil as the scaling governor" * tag 'pm-5.11-rc8' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm: cpufreq: ACPI: Update arch scale-invariance max perf ratio if CPPC is not there cpufreq: ACPI: Extend frequency tables to cover boost frequencies
2 parents a396149 + d11a1d0 commit 291009f

File tree

2 files changed

+104
-12
lines changed

2 files changed

+104
-12
lines changed

arch/x86/kernel/smpboot.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1833,6 +1833,7 @@ void arch_set_max_freq_ratio(bool turbo_disabled)
18331833
arch_max_freq_ratio = turbo_disabled ? SCHED_CAPACITY_SCALE :
18341834
arch_turbo_freq_ratio;
18351835
}
1836+
EXPORT_SYMBOL_GPL(arch_set_max_freq_ratio);
18361837

18371838
static bool turbo_disabled(void)
18381839
{

drivers/cpufreq/acpi-cpufreq.c

Lines changed: 103 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include <linux/uaccess.h>
2727

2828
#include <acpi/processor.h>
29+
#include <acpi/cppc_acpi.h>
2930

3031
#include <asm/msr.h>
3132
#include <asm/processor.h>
@@ -53,6 +54,7 @@ struct acpi_cpufreq_data {
5354
unsigned int resume;
5455
unsigned int cpu_feature;
5556
unsigned int acpi_perf_cpu;
57+
unsigned int first_perf_state;
5658
cpumask_var_t freqdomain_cpus;
5759
void (*cpu_freq_write)(struct acpi_pct_register *reg, u32 val);
5860
u32 (*cpu_freq_read)(struct acpi_pct_register *reg);
@@ -221,10 +223,10 @@ static unsigned extract_msr(struct cpufreq_policy *policy, u32 msr)
221223

222224
perf = to_perf_data(data);
223225

224-
cpufreq_for_each_entry(pos, policy->freq_table)
226+
cpufreq_for_each_entry(pos, policy->freq_table + data->first_perf_state)
225227
if (msr == perf->states[pos->driver_data].status)
226228
return pos->frequency;
227-
return policy->freq_table[0].frequency;
229+
return policy->freq_table[data->first_perf_state].frequency;
228230
}
229231

230232
static unsigned extract_freq(struct cpufreq_policy *policy, u32 val)
@@ -363,6 +365,7 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
363365
struct cpufreq_policy *policy;
364366
unsigned int freq;
365367
unsigned int cached_freq;
368+
unsigned int state;
366369

367370
pr_debug("%s (%d)\n", __func__, cpu);
368371

@@ -374,7 +377,11 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
374377
if (unlikely(!data || !policy->freq_table))
375378
return 0;
376379

377-
cached_freq = policy->freq_table[to_perf_data(data)->state].frequency;
380+
state = to_perf_data(data)->state;
381+
if (state < data->first_perf_state)
382+
state = data->first_perf_state;
383+
384+
cached_freq = policy->freq_table[state].frequency;
378385
freq = extract_freq(policy, get_cur_val(cpumask_of(cpu), data));
379386
if (freq != cached_freq) {
380387
/*
@@ -628,16 +635,54 @@ static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c)
628635
}
629636
#endif
630637

638+
#ifdef CONFIG_ACPI_CPPC_LIB
639+
static u64 get_max_boost_ratio(unsigned int cpu)
640+
{
641+
struct cppc_perf_caps perf_caps;
642+
u64 highest_perf, nominal_perf;
643+
int ret;
644+
645+
if (acpi_pstate_strict)
646+
return 0;
647+
648+
ret = cppc_get_perf_caps(cpu, &perf_caps);
649+
if (ret) {
650+
pr_debug("CPU%d: Unable to get performance capabilities (%d)\n",
651+
cpu, ret);
652+
return 0;
653+
}
654+
655+
highest_perf = perf_caps.highest_perf;
656+
nominal_perf = perf_caps.nominal_perf;
657+
658+
if (!highest_perf || !nominal_perf) {
659+
pr_debug("CPU%d: highest or nominal performance missing\n", cpu);
660+
return 0;
661+
}
662+
663+
if (highest_perf < nominal_perf) {
664+
pr_debug("CPU%d: nominal performance above highest\n", cpu);
665+
return 0;
666+
}
667+
668+
return div_u64(highest_perf << SCHED_CAPACITY_SHIFT, nominal_perf);
669+
}
670+
#else
671+
static inline u64 get_max_boost_ratio(unsigned int cpu) { return 0; }
672+
#endif
673+
631674
static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
632675
{
633-
unsigned int i;
634-
unsigned int valid_states = 0;
635-
unsigned int cpu = policy->cpu;
676+
struct cpufreq_frequency_table *freq_table;
677+
struct acpi_processor_performance *perf;
636678
struct acpi_cpufreq_data *data;
679+
unsigned int cpu = policy->cpu;
680+
struct cpuinfo_x86 *c = &cpu_data(cpu);
681+
unsigned int valid_states = 0;
637682
unsigned int result = 0;
638-
struct cpuinfo_x86 *c = &cpu_data(policy->cpu);
639-
struct acpi_processor_performance *perf;
640-
struct cpufreq_frequency_table *freq_table;
683+
unsigned int state_count;
684+
u64 max_boost_ratio;
685+
unsigned int i;
641686
#ifdef CONFIG_SMP
642687
static int blacklisted;
643688
#endif
@@ -750,8 +795,28 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
750795
goto err_unreg;
751796
}
752797

753-
freq_table = kcalloc(perf->state_count + 1, sizeof(*freq_table),
754-
GFP_KERNEL);
798+
state_count = perf->state_count + 1;
799+
800+
max_boost_ratio = get_max_boost_ratio(cpu);
801+
if (max_boost_ratio) {
802+
/*
803+
* Make a room for one more entry to represent the highest
804+
* available "boost" frequency.
805+
*/
806+
state_count++;
807+
valid_states++;
808+
data->first_perf_state = valid_states;
809+
} else {
810+
/*
811+
* If the maximum "boost" frequency is unknown, ask the arch
812+
* scale-invariance code to use the "nominal" performance for
813+
* CPU utilization scaling so as to prevent the schedutil
814+
* governor from selecting inadequate CPU frequencies.
815+
*/
816+
arch_set_max_freq_ratio(true);
817+
}
818+
819+
freq_table = kcalloc(state_count, sizeof(*freq_table), GFP_KERNEL);
755820
if (!freq_table) {
756821
result = -ENOMEM;
757822
goto err_unreg;
@@ -785,6 +850,30 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
785850
valid_states++;
786851
}
787852
freq_table[valid_states].frequency = CPUFREQ_TABLE_END;
853+
854+
if (max_boost_ratio) {
855+
unsigned int state = data->first_perf_state;
856+
unsigned int freq = freq_table[state].frequency;
857+
858+
/*
859+
* Because the loop above sorts the freq_table entries in the
860+
* descending order, freq is the maximum frequency in the table.
861+
* Assume that it corresponds to the CPPC nominal frequency and
862+
* use it to populate the frequency field of the extra "boost"
863+
* frequency entry.
864+
*/
865+
freq_table[0].frequency = freq * max_boost_ratio >> SCHED_CAPACITY_SHIFT;
866+
/*
867+
* The purpose of the extra "boost" frequency entry is to make
868+
* the rest of cpufreq aware of the real maximum frequency, but
869+
* the way to request it is the same as for the first_perf_state
870+
* entry that is expected to cover the entire range of "boost"
871+
* frequencies of the CPU, so copy the driver_data value from
872+
* that entry.
873+
*/
874+
freq_table[0].driver_data = freq_table[state].driver_data;
875+
}
876+
788877
policy->freq_table = freq_table;
789878
perf->state = 0;
790879

@@ -858,8 +947,10 @@ static void acpi_cpufreq_cpu_ready(struct cpufreq_policy *policy)
858947
{
859948
struct acpi_processor_performance *perf = per_cpu_ptr(acpi_perf_data,
860949
policy->cpu);
950+
struct acpi_cpufreq_data *data = policy->driver_data;
951+
unsigned int freq = policy->freq_table[data->first_perf_state].frequency;
861952

862-
if (perf->states[0].core_frequency * 1000 != policy->cpuinfo.max_freq)
953+
if (perf->states[0].core_frequency * 1000 != freq)
863954
pr_warn(FW_WARN "P-state 0 is not max freq\n");
864955
}
865956

0 commit comments

Comments
 (0)