Skip to content

Commit 976aa63

Browse files
committed
Merge tag 'pm-6.16-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
Pull more power management updates from Rafael Wysocki: "These revert an x86 commit that introduced a nasty power regression on some systems, fix PSCI cpuidle driver and ACPI cpufreq driver regressions, add Rust abstractions for cpufreq, OPP, clk, and cpumasks, add a Rust-based cpufreq-dt driver, and do a minor SCMI cpufreq driver cleanup: - Revert an x86 commit that went into 6.15 and caused idle power, including power in suspend-to-idle, to rise rather dramatically on systems booting with "nosmt" in the kernel command line (Rafael Wysocki) - Prevent freeing an uninitialized pointer in error path of dt_idle_state_present() in the PSCI cpuidle driver (Dan Carpenter) - Use KHz as the nominal_freq units in get_max_boost_ratio() in the ACPI cpufreq driver (iGautham Shenoy) - Add Rust abstractions for CPUFreq framework (Viresh Kumar) - Add Rust abstractions for OPP framework (Viresh Kumar) - Add basic Rust abstractions for Clk and Cpumask frameworks (Viresh Kumar) - Clean up the SCMI cpufreq driver somewhat (Mike Tipton)" * tag 'pm-6.16-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm: (21 commits) Revert "x86/smp: Eliminate mwait_play_dead_cpuid_hint()" acpi-cpufreq: Fix nominal_freq units to KHz in get_max_boost_ratio() rust: opp: Move `cfg(CONFIG_OF)` attribute to the top of doc test cpuidle: psci: Fix uninitialized variable in dt_idle_state_present() rust: opp: Make the doctest example depend on CONFIG_OF cpufreq: scmi: Skip SCMI devices that aren't used by the CPUs cpufreq: Add Rust-based cpufreq-dt driver rust: opp: Extend OPP abstractions with cpufreq support rust: cpufreq: Extend abstractions for driver registration rust: cpufreq: Extend abstractions for policy and driver ops rust: cpufreq: Add initial abstractions for cpufreq framework rust: opp: Add abstractions for the configuration options rust: opp: Add abstractions for the OPP table rust: opp: Add initial abstractions for OPP framework rust: cpu: Add from_cpu() rust: macros: enable use of hyphens in module names rust: clk: Add initial abstractions rust: clk: Add helpers for Rust code MAINTAINERS: Add entry for Rust cpumask API rust: cpumask: Add initial abstractions ...
2 parents 8477ab1 + 3d031d0 commit 976aa63

File tree

20 files changed

+3624
-22
lines changed

20 files changed

+3624
-22
lines changed

MAINTAINERS

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5952,6 +5952,8 @@ F: include/dt-bindings/clock/
59525952
F: include/linux/clk-pr*
59535953
F: include/linux/clk/
59545954
F: include/linux/of_clk.h
5955+
F: rust/helpers/clk.c
5956+
F: rust/kernel/clk.rs
59555957
X: drivers/clk/clkdev.c
59565958

59575959
COMMON INTERNET FILE SYSTEM CLIENT (CIFS and SMB3)
@@ -6211,6 +6213,7 @@ F: drivers/cpufreq/
62116213
F: include/linux/cpufreq.h
62126214
F: include/linux/sched/cpufreq.h
62136215
F: kernel/sched/cpufreq*.c
6216+
F: rust/kernel/cpufreq.rs
62146217
F: tools/testing/selftests/cpufreq/
62156218

62166219
CPU HOTPLUG
@@ -6224,6 +6227,7 @@ F: include/linux/cpuhotplug.h
62246227
F: include/linux/smpboot.h
62256228
F: kernel/cpu.c
62266229
F: kernel/smpboot.*
6230+
F: rust/kernel/cpu.rs
62276231

62286232
CPU IDLE TIME MANAGEMENT FRAMEWORK
62296233
M: "Rafael J. Wysocki" <[email protected]>
@@ -6308,6 +6312,12 @@ L: [email protected]
63086312
S: Maintained
63096313
F: drivers/cpuidle/cpuidle-riscv-sbi.c
63106314

6315+
CPUMASK API [RUST]
6316+
M: Viresh Kumar <[email protected]>
6317+
R: Yury Norov <[email protected]>
6318+
S: Maintained
6319+
F: rust/kernel/cpumask.rs
6320+
63116321
CRAMFS FILESYSTEM
63126322
M: Nicolas Pitre <[email protected]>
63136323
S: Maintained
@@ -18514,6 +18524,7 @@ F: Documentation/devicetree/bindings/opp/
1851418524
F: Documentation/power/opp.rst
1851518525
F: drivers/opp/
1851618526
F: include/linux/pm_opp.h
18527+
F: rust/kernel/opp.rs
1851718528

1851818529
OPL4 DRIVER
1851918530
M: Clemens Ladisch <[email protected]>

arch/x86/kernel/smpboot.c

Lines changed: 47 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1244,10 +1244,6 @@ void play_dead_common(void)
12441244
local_irq_disable();
12451245
}
12461246

1247-
/*
1248-
* We need to flush the caches before going to sleep, lest we have
1249-
* dirty data in our caches when we come back up.
1250-
*/
12511247
void __noreturn mwait_play_dead(unsigned int eax_hint)
12521248
{
12531249
struct mwait_cpu_dead *md = this_cpu_ptr(&mwait_cpu_dead);
@@ -1293,6 +1289,50 @@ void __noreturn mwait_play_dead(unsigned int eax_hint)
12931289
}
12941290
}
12951291

1292+
/*
1293+
* We need to flush the caches before going to sleep, lest we have
1294+
* dirty data in our caches when we come back up.
1295+
*/
1296+
static inline void mwait_play_dead_cpuid_hint(void)
1297+
{
1298+
unsigned int eax, ebx, ecx, edx;
1299+
unsigned int highest_cstate = 0;
1300+
unsigned int highest_subcstate = 0;
1301+
int i;
1302+
1303+
if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
1304+
boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
1305+
return;
1306+
if (!this_cpu_has(X86_FEATURE_MWAIT))
1307+
return;
1308+
if (!this_cpu_has(X86_FEATURE_CLFLUSH))
1309+
return;
1310+
1311+
eax = CPUID_LEAF_MWAIT;
1312+
ecx = 0;
1313+
native_cpuid(&eax, &ebx, &ecx, &edx);
1314+
1315+
/*
1316+
* eax will be 0 if EDX enumeration is not valid.
1317+
* Initialized below to cstate, sub_cstate value when EDX is valid.
1318+
*/
1319+
if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED)) {
1320+
eax = 0;
1321+
} else {
1322+
edx >>= MWAIT_SUBSTATE_SIZE;
1323+
for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) {
1324+
if (edx & MWAIT_SUBSTATE_MASK) {
1325+
highest_cstate = i;
1326+
highest_subcstate = edx & MWAIT_SUBSTATE_MASK;
1327+
}
1328+
}
1329+
eax = (highest_cstate << MWAIT_SUBSTATE_SIZE) |
1330+
(highest_subcstate - 1);
1331+
}
1332+
1333+
mwait_play_dead(eax);
1334+
}
1335+
12961336
/*
12971337
* Kick all "offline" CPUs out of mwait on kexec(). See comment in
12981338
* mwait_play_dead().
@@ -1343,9 +1383,9 @@ void native_play_dead(void)
13431383
play_dead_common();
13441384
tboot_shutdown(TB_SHUTDOWN_WFS);
13451385

1346-
/* Below returns only on error. */
1347-
cpuidle_play_dead();
1348-
hlt_play_dead();
1386+
mwait_play_dead_cpuid_hint();
1387+
if (cpuidle_play_dead())
1388+
hlt_play_dead();
13491389
}
13501390

13511391
#else /* ... !CONFIG_HOTPLUG_CPU */

drivers/cpufreq/Kconfig

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,18 @@ config CPUFREQ_DT
217217

218218
If in doubt, say N.
219219

220+
config CPUFREQ_DT_RUST
221+
tristate "Rust based Generic DT based cpufreq driver"
222+
depends on HAVE_CLK && OF && RUST
223+
select CPUFREQ_DT_PLATDEV
224+
select PM_OPP
225+
help
226+
This adds a Rust based generic DT based cpufreq driver for frequency
227+
management. It supports both uniprocessor (UP) and symmetric
228+
multiprocessor (SMP) systems.
229+
230+
If in doubt, say N.
231+
220232
config CPUFREQ_VIRT
221233
tristate "Virtual cpufreq driver"
222234
depends on GENERIC_ARCH_TOPOLOGY

drivers/cpufreq/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ obj-$(CONFIG_CPU_FREQ_GOV_COMMON) += cpufreq_governor.o
1515
obj-$(CONFIG_CPU_FREQ_GOV_ATTR_SET) += cpufreq_governor_attr_set.o
1616

1717
obj-$(CONFIG_CPUFREQ_DT) += cpufreq-dt.o
18+
obj-$(CONFIG_CPUFREQ_DT_RUST) += rcpufreq_dt.o
1819
obj-$(CONFIG_CPUFREQ_DT_PLATDEV) += cpufreq-dt-platdev.o
1920
obj-$(CONFIG_CPUFREQ_VIRT) += virtual-cpufreq.o
2021

drivers/cpufreq/acpi-cpufreq.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -660,7 +660,7 @@ static u64 get_max_boost_ratio(unsigned int cpu, u64 *nominal_freq)
660660
nominal_perf = perf_caps.nominal_perf;
661661

662662
if (nominal_freq)
663-
*nominal_freq = perf_caps.nominal_freq;
663+
*nominal_freq = perf_caps.nominal_freq * 1000;
664664

665665
if (!highest_perf || !nominal_perf) {
666666
pr_debug("CPU%d: highest or nominal performance missing\n", cpu);

drivers/cpufreq/rcpufreq_dt.rs

Lines changed: 226 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,226 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
3+
//! Rust based implementation of the cpufreq-dt driver.
4+
5+
use kernel::{
6+
c_str,
7+
clk::Clk,
8+
cpu, cpufreq,
9+
cpumask::CpumaskVar,
10+
device::{Core, Device},
11+
error::code::*,
12+
fmt,
13+
macros::vtable,
14+
module_platform_driver, of, opp, platform,
15+
prelude::*,
16+
str::CString,
17+
sync::Arc,
18+
};
19+
20+
/// Finds exact supply name from the OF node.
21+
fn find_supply_name_exact(dev: &Device, name: &str) -> Option<CString> {
22+
let prop_name = CString::try_from_fmt(fmt!("{}-supply", name)).ok()?;
23+
dev.property_present(&prop_name)
24+
.then(|| CString::try_from_fmt(fmt!("{name}")).ok())
25+
.flatten()
26+
}
27+
28+
/// Finds supply name for the CPU from DT.
29+
fn find_supply_names(dev: &Device, cpu: u32) -> Option<KVec<CString>> {
30+
// Try "cpu0" for older DTs, fallback to "cpu".
31+
let name = (cpu == 0)
32+
.then(|| find_supply_name_exact(dev, "cpu0"))
33+
.flatten()
34+
.or_else(|| find_supply_name_exact(dev, "cpu"))?;
35+
36+
let mut list = KVec::with_capacity(1, GFP_KERNEL).ok()?;
37+
list.push(name, GFP_KERNEL).ok()?;
38+
39+
Some(list)
40+
}
41+
42+
/// Represents the cpufreq dt device.
43+
struct CPUFreqDTDevice {
44+
opp_table: opp::Table,
45+
freq_table: opp::FreqTable,
46+
_mask: CpumaskVar,
47+
_token: Option<opp::ConfigToken>,
48+
_clk: Clk,
49+
}
50+
51+
#[derive(Default)]
52+
struct CPUFreqDTDriver;
53+
54+
#[vtable]
55+
impl opp::ConfigOps for CPUFreqDTDriver {}
56+
57+
#[vtable]
58+
impl cpufreq::Driver for CPUFreqDTDriver {
59+
const NAME: &'static CStr = c_str!("cpufreq-dt");
60+
const FLAGS: u16 = cpufreq::flags::NEED_INITIAL_FREQ_CHECK | cpufreq::flags::IS_COOLING_DEV;
61+
const BOOST_ENABLED: bool = true;
62+
63+
type PData = Arc<CPUFreqDTDevice>;
64+
65+
fn init(policy: &mut cpufreq::Policy) -> Result<Self::PData> {
66+
let cpu = policy.cpu();
67+
// SAFETY: The CPU device is only used during init; it won't get hot-unplugged. The cpufreq
68+
// core registers with CPU notifiers and the cpufreq core/driver won't use the CPU device,
69+
// once the CPU is hot-unplugged.
70+
let dev = unsafe { cpu::from_cpu(cpu)? };
71+
let mut mask = CpumaskVar::new_zero(GFP_KERNEL)?;
72+
73+
mask.set(cpu);
74+
75+
let token = find_supply_names(dev, cpu)
76+
.map(|names| {
77+
opp::Config::<Self>::new()
78+
.set_regulator_names(names)?
79+
.set(dev)
80+
})
81+
.transpose()?;
82+
83+
// Get OPP-sharing information from "operating-points-v2" bindings.
84+
let fallback = match opp::Table::of_sharing_cpus(dev, &mut mask) {
85+
Ok(()) => false,
86+
Err(e) if e == ENOENT => {
87+
// "operating-points-v2" not supported. If the platform hasn't
88+
// set sharing CPUs, fallback to all CPUs share the `Policy`
89+
// for backward compatibility.
90+
opp::Table::sharing_cpus(dev, &mut mask).is_err()
91+
}
92+
Err(e) => return Err(e),
93+
};
94+
95+
// Initialize OPP tables for all policy cpus.
96+
//
97+
// For platforms not using "operating-points-v2" bindings, we do this
98+
// before updating policy cpus. Otherwise, we will end up creating
99+
// duplicate OPPs for the CPUs.
100+
//
101+
// OPPs might be populated at runtime, don't fail for error here unless
102+
// it is -EPROBE_DEFER.
103+
let mut opp_table = match opp::Table::from_of_cpumask(dev, &mut mask) {
104+
Ok(table) => table,
105+
Err(e) => {
106+
if e == EPROBE_DEFER {
107+
return Err(e);
108+
}
109+
110+
// The table is added dynamically ?
111+
opp::Table::from_dev(dev)?
112+
}
113+
};
114+
115+
// The OPP table must be initialized, statically or dynamically, by this point.
116+
opp_table.opp_count()?;
117+
118+
// Set sharing cpus for fallback scenario.
119+
if fallback {
120+
mask.setall();
121+
opp_table.set_sharing_cpus(&mut mask)?;
122+
}
123+
124+
let mut transition_latency = opp_table.max_transition_latency_ns() as u32;
125+
if transition_latency == 0 {
126+
transition_latency = cpufreq::ETERNAL_LATENCY_NS;
127+
}
128+
129+
policy
130+
.set_dvfs_possible_from_any_cpu(true)
131+
.set_suspend_freq(opp_table.suspend_freq())
132+
.set_transition_latency_ns(transition_latency);
133+
134+
let freq_table = opp_table.cpufreq_table()?;
135+
// SAFETY: The `freq_table` is not dropped while it is getting used by the C code.
136+
unsafe { policy.set_freq_table(&freq_table) };
137+
138+
// SAFETY: The returned `clk` is not dropped while it is getting used by the C code.
139+
let clk = unsafe { policy.set_clk(dev, None)? };
140+
141+
mask.copy(policy.cpus());
142+
143+
Ok(Arc::new(
144+
CPUFreqDTDevice {
145+
opp_table,
146+
freq_table,
147+
_mask: mask,
148+
_token: token,
149+
_clk: clk,
150+
},
151+
GFP_KERNEL,
152+
)?)
153+
}
154+
155+
fn exit(_policy: &mut cpufreq::Policy, _data: Option<Self::PData>) -> Result {
156+
Ok(())
157+
}
158+
159+
fn online(_policy: &mut cpufreq::Policy) -> Result {
160+
// We did light-weight tear down earlier, nothing to do here.
161+
Ok(())
162+
}
163+
164+
fn offline(_policy: &mut cpufreq::Policy) -> Result {
165+
// Preserve policy->data and don't free resources on light-weight
166+
// tear down.
167+
Ok(())
168+
}
169+
170+
fn suspend(policy: &mut cpufreq::Policy) -> Result {
171+
policy.generic_suspend()
172+
}
173+
174+
fn verify(data: &mut cpufreq::PolicyData) -> Result {
175+
data.generic_verify()
176+
}
177+
178+
fn target_index(policy: &mut cpufreq::Policy, index: cpufreq::TableIndex) -> Result {
179+
let Some(data) = policy.data::<Self::PData>() else {
180+
return Err(ENOENT);
181+
};
182+
183+
let freq = data.freq_table.freq(index)?;
184+
data.opp_table.set_rate(freq)
185+
}
186+
187+
fn get(policy: &mut cpufreq::Policy) -> Result<u32> {
188+
policy.generic_get()
189+
}
190+
191+
fn set_boost(_policy: &mut cpufreq::Policy, _state: i32) -> Result {
192+
Ok(())
193+
}
194+
195+
fn register_em(policy: &mut cpufreq::Policy) {
196+
policy.register_em_opp()
197+
}
198+
}
199+
200+
kernel::of_device_table!(
201+
OF_TABLE,
202+
MODULE_OF_TABLE,
203+
<CPUFreqDTDriver as platform::Driver>::IdInfo,
204+
[(of::DeviceId::new(c_str!("operating-points-v2")), ())]
205+
);
206+
207+
impl platform::Driver for CPUFreqDTDriver {
208+
type IdInfo = ();
209+
const OF_ID_TABLE: Option<of::IdTable<Self::IdInfo>> = Some(&OF_TABLE);
210+
211+
fn probe(
212+
pdev: &platform::Device<Core>,
213+
_id_info: Option<&Self::IdInfo>,
214+
) -> Result<Pin<KBox<Self>>> {
215+
cpufreq::Registration::<CPUFreqDTDriver>::new_foreign_owned(pdev.as_ref())?;
216+
Ok(KBox::new(Self {}, GFP_KERNEL)?.into())
217+
}
218+
}
219+
220+
module_platform_driver! {
221+
type: CPUFreqDTDriver,
222+
name: "cpufreq-dt",
223+
author: "Viresh Kumar <[email protected]>",
224+
description: "Generic CPUFreq DT driver",
225+
license: "GPL v2",
226+
}

0 commit comments

Comments
 (0)