Skip to content

Commit de9a6c8

Browse files
ij-intelbjorn-helgaas
authored andcommitted
PCI/bwctrl: Add pcie_set_target_speed() to set PCIe Link Speed
Currently, PCIe Link Speeds are adjusted by custom code rather than in a common function provided in PCI core. The PCIe bandwidth controller (bwctrl) introduces an in-kernel API, pcie_set_target_speed(), to set PCIe Link Speed. Convert Target Speed quirk to use the new API. The Target Speed quirk runs very early when bwctrl is not yet probed for a Port and can also run later when bwctrl is already setup for the Port, which requires the per port mutex (set_speed_mutex) to be only taken if the bwctrl setup is already complete. The new API is also intended to be used in an upcoming commit that adds a thermal cooling device to throttle PCIe bandwidth when thermal thresholds are reached. The PCIe bandwidth control procedure is as follows. The highest speed supported by the Port and the PCIe device which is not higher than the requested speed is selected and written into the Target Link Speed in the Link Control 2 Register. Then bandwidth controller retrains the PCIe Link. Bandwidth Notifications enable the cur_bus_speed in the struct pci_bus to keep track PCIe Link Speed changes. While Bandwidth Notifications should also be generated when bandwidth controller alters the PCIe Link Speed, a few platforms do not deliver LMBS interrupt after Link Training as expected. Thus, after changing the Link Speed, bandwidth controller makes additional read for the Link Status Register to ensure cur_bus_speed is consistent with the new PCIe Link Speed. Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Ilpo Järvinen <[email protected]> [bhelgaas: squash devm_mutex_init() error checking from https://lore.kernel.org/r/[email protected], drop export of pcie_set_target_speed()] Signed-off-by: Bjorn Helgaas <[email protected]> Reviewed-by: Jonathan Cameron <[email protected]>
1 parent 665745f commit de9a6c8

File tree

4 files changed

+208
-19
lines changed

4 files changed

+208
-19
lines changed

drivers/pci/pci.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -331,6 +331,17 @@ void pci_disable_bridge_window(struct pci_dev *dev);
331331
struct pci_bus *pci_bus_get(struct pci_bus *bus);
332332
void pci_bus_put(struct pci_bus *bus);
333333

334+
#define PCIE_LNKCAP_SLS2SPEED(lnkcap) \
335+
({ \
336+
((lnkcap) == PCI_EXP_LNKCAP_SLS_64_0GB ? PCIE_SPEED_64_0GT : \
337+
(lnkcap) == PCI_EXP_LNKCAP_SLS_32_0GB ? PCIE_SPEED_32_0GT : \
338+
(lnkcap) == PCI_EXP_LNKCAP_SLS_16_0GB ? PCIE_SPEED_16_0GT : \
339+
(lnkcap) == PCI_EXP_LNKCAP_SLS_8_0GB ? PCIE_SPEED_8_0GT : \
340+
(lnkcap) == PCI_EXP_LNKCAP_SLS_5_0GB ? PCIE_SPEED_5_0GT : \
341+
(lnkcap) == PCI_EXP_LNKCAP_SLS_2_5GB ? PCIE_SPEED_2_5GT : \
342+
PCI_SPEED_UNKNOWN); \
343+
})
344+
334345
/* PCIe link information from Link Capabilities 2 */
335346
#define PCIE_LNKCAP2_SLS2SPEED(lnkcap2) \
336347
((lnkcap2) & PCI_EXP_LNKCAP2_SLS_64_0GB ? PCIE_SPEED_64_0GT : \
@@ -341,6 +352,15 @@ void pci_bus_put(struct pci_bus *bus);
341352
(lnkcap2) & PCI_EXP_LNKCAP2_SLS_2_5GB ? PCIE_SPEED_2_5GT : \
342353
PCI_SPEED_UNKNOWN)
343354

355+
#define PCIE_LNKCTL2_TLS2SPEED(lnkctl2) \
356+
((lnkctl2) == PCI_EXP_LNKCTL2_TLS_64_0GT ? PCIE_SPEED_64_0GT : \
357+
(lnkctl2) == PCI_EXP_LNKCTL2_TLS_32_0GT ? PCIE_SPEED_32_0GT : \
358+
(lnkctl2) == PCI_EXP_LNKCTL2_TLS_16_0GT ? PCIE_SPEED_16_0GT : \
359+
(lnkctl2) == PCI_EXP_LNKCTL2_TLS_8_0GT ? PCIE_SPEED_8_0GT : \
360+
(lnkctl2) == PCI_EXP_LNKCTL2_TLS_5_0GT ? PCIE_SPEED_5_0GT : \
361+
(lnkctl2) == PCI_EXP_LNKCTL2_TLS_2_5GT ? PCIE_SPEED_2_5GT : \
362+
PCI_SPEED_UNKNOWN)
363+
344364
/* PCIe speed to Mb/s reduced by encoding overhead */
345365
#define PCIE_SPEED2MBS_ENC(speed) \
346366
((speed) == PCIE_SPEED_64_0GT ? 64000*1/1 : \

drivers/pci/pcie/bwctrl.c

Lines changed: 174 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,11 @@
77
* Copyright (C) 2019 Dell Inc
88
* Copyright (C) 2023-2024 Intel Corporation
99
*
10+
* The PCIe bandwidth controller provides a way to alter PCIe Link Speeds
11+
* and notify the operating system when the Link Width or Speed changes. The
12+
* notification capability is required for all Root Ports and Downstream
13+
* Ports supporting Link Width wider than x1 and/or multiple Link Speeds.
14+
*
1015
* This service port driver hooks into the Bandwidth Notification interrupt
1116
* watching for changes or links becoming degraded in operation. It updates
1217
* the cached Current Link Speed that is exposed to user space through sysfs.
@@ -15,9 +20,12 @@
1520
#define dev_fmt(fmt) "bwctrl: " fmt
1621

1722
#include <linux/atomic.h>
23+
#include <linux/bitops.h>
24+
#include <linux/bits.h>
1825
#include <linux/cleanup.h>
1926
#include <linux/errno.h>
2027
#include <linux/interrupt.h>
28+
#include <linux/mutex.h>
2129
#include <linux/pci.h>
2230
#include <linux/rwsem.h>
2331
#include <linux/slab.h>
@@ -28,14 +36,166 @@
2836

2937
/**
3038
* struct pcie_bwctrl_data - PCIe bandwidth controller
39+
* @set_speed_mutex: Serializes link speed changes
3140
* @lbms_count: Count for LBMS (since last reset)
3241
*/
3342
struct pcie_bwctrl_data {
43+
struct mutex set_speed_mutex;
3444
atomic_t lbms_count;
3545
};
3646

37-
/* Prevents port removal during LBMS count accessors */
47+
/*
48+
* Prevent port removal during LBMS count accessors and Link Speed changes.
49+
*
50+
* These have to be differentiated because pcie_bwctrl_change_speed() calls
51+
* pcie_retrain_link() which uses LBMS count reset accessor on success
52+
* (using just one rwsem triggers "possible recursive locking detected"
53+
* warning).
54+
*/
3855
static DECLARE_RWSEM(pcie_bwctrl_lbms_rwsem);
56+
static DECLARE_RWSEM(pcie_bwctrl_setspeed_rwsem);
57+
58+
static bool pcie_valid_speed(enum pci_bus_speed speed)
59+
{
60+
return (speed >= PCIE_SPEED_2_5GT) && (speed <= PCIE_SPEED_64_0GT);
61+
}
62+
63+
static u16 pci_bus_speed2lnkctl2(enum pci_bus_speed speed)
64+
{
65+
static const u8 speed_conv[] = {
66+
[PCIE_SPEED_2_5GT] = PCI_EXP_LNKCTL2_TLS_2_5GT,
67+
[PCIE_SPEED_5_0GT] = PCI_EXP_LNKCTL2_TLS_5_0GT,
68+
[PCIE_SPEED_8_0GT] = PCI_EXP_LNKCTL2_TLS_8_0GT,
69+
[PCIE_SPEED_16_0GT] = PCI_EXP_LNKCTL2_TLS_16_0GT,
70+
[PCIE_SPEED_32_0GT] = PCI_EXP_LNKCTL2_TLS_32_0GT,
71+
[PCIE_SPEED_64_0GT] = PCI_EXP_LNKCTL2_TLS_64_0GT,
72+
};
73+
74+
if (WARN_ON_ONCE(!pcie_valid_speed(speed)))
75+
return 0;
76+
77+
return speed_conv[speed];
78+
}
79+
80+
static inline u16 pcie_supported_speeds2target_speed(u8 supported_speeds)
81+
{
82+
return __fls(supported_speeds);
83+
}
84+
85+
/**
86+
* pcie_bwctrl_select_speed - Select Target Link Speed
87+
* @port: PCIe Port
88+
* @speed_req: Requested PCIe Link Speed
89+
*
90+
* Select Target Link Speed by take into account Supported Link Speeds of
91+
* both the Root Port and the Endpoint.
92+
*
93+
* Return: Target Link Speed (1=2.5GT/s, 2=5GT/s, 3=8GT/s, etc.)
94+
*/
95+
static u16 pcie_bwctrl_select_speed(struct pci_dev *port, enum pci_bus_speed speed_req)
96+
{
97+
struct pci_bus *bus = port->subordinate;
98+
u8 desired_speeds, supported_speeds;
99+
struct pci_dev *dev;
100+
101+
desired_speeds = GENMASK(pci_bus_speed2lnkctl2(speed_req),
102+
__fls(PCI_EXP_LNKCAP2_SLS_2_5GB));
103+
104+
supported_speeds = port->supported_speeds;
105+
if (bus) {
106+
down_read(&pci_bus_sem);
107+
dev = list_first_entry_or_null(&bus->devices, struct pci_dev, bus_list);
108+
if (dev)
109+
supported_speeds &= dev->supported_speeds;
110+
up_read(&pci_bus_sem);
111+
}
112+
if (!supported_speeds)
113+
return PCI_EXP_LNKCAP2_SLS_2_5GB;
114+
115+
return pcie_supported_speeds2target_speed(supported_speeds & desired_speeds);
116+
}
117+
118+
static int pcie_bwctrl_change_speed(struct pci_dev *port, u16 target_speed, bool use_lt)
119+
{
120+
int ret;
121+
122+
ret = pcie_capability_clear_and_set_word(port, PCI_EXP_LNKCTL2,
123+
PCI_EXP_LNKCTL2_TLS, target_speed);
124+
if (ret != PCIBIOS_SUCCESSFUL)
125+
return pcibios_err_to_errno(ret);
126+
127+
ret = pcie_retrain_link(port, use_lt);
128+
if (ret < 0)
129+
return ret;
130+
131+
/*
132+
* Ensure link speed updates also with platforms that have problems
133+
* with notifications.
134+
*/
135+
if (port->subordinate)
136+
pcie_update_link_speed(port->subordinate);
137+
138+
return 0;
139+
}
140+
141+
/**
142+
* pcie_set_target_speed - Set downstream Link Speed for PCIe Port
143+
* @port: PCIe Port
144+
* @speed_req: Requested PCIe Link Speed
145+
* @use_lt: Wait for the LT or DLLLA bit to detect the end of link training
146+
*
147+
* Attempt to set PCIe Port Link Speed to @speed_req. @speed_req may be
148+
* adjusted downwards to the best speed supported by both the Port and PCIe
149+
* Device underneath it.
150+
*
151+
* Return:
152+
* * 0 - on success
153+
* * -EINVAL - @speed_req is not a PCIe Link Speed
154+
* * -ENODEV - @port is not controllable
155+
* * -ETIMEDOUT - changing Link Speed took too long
156+
* * -EAGAIN - Link Speed was changed but @speed_req was not achieved
157+
*/
158+
int pcie_set_target_speed(struct pci_dev *port, enum pci_bus_speed speed_req,
159+
bool use_lt)
160+
{
161+
struct pci_bus *bus = port->subordinate;
162+
u16 target_speed;
163+
int ret;
164+
165+
if (WARN_ON_ONCE(!pcie_valid_speed(speed_req)))
166+
return -EINVAL;
167+
168+
if (bus && bus->cur_bus_speed == speed_req)
169+
return 0;
170+
171+
target_speed = pcie_bwctrl_select_speed(port, speed_req);
172+
173+
scoped_guard(rwsem_read, &pcie_bwctrl_setspeed_rwsem) {
174+
struct pcie_bwctrl_data *data = port->link_bwctrl;
175+
176+
/*
177+
* port->link_bwctrl is NULL during initial scan when called
178+
* e.g. from the Target Speed quirk.
179+
*/
180+
if (data)
181+
mutex_lock(&data->set_speed_mutex);
182+
183+
ret = pcie_bwctrl_change_speed(port, target_speed, use_lt);
184+
185+
if (data)
186+
mutex_unlock(&data->set_speed_mutex);
187+
}
188+
189+
/*
190+
* Despite setting higher speed into the Target Link Speed, empty
191+
* bus won't train to 5GT+ speeds.
192+
*/
193+
if (!ret && bus && bus->cur_bus_speed != speed_req &&
194+
!list_empty(&bus->devices))
195+
ret = -EAGAIN;
196+
197+
return ret;
198+
}
39199

40200
static void pcie_bwnotif_enable(struct pcie_device *srv)
41201
{
@@ -136,14 +296,20 @@ static int pcie_bwnotif_probe(struct pcie_device *srv)
136296
if (!data)
137297
return -ENOMEM;
138298

299+
ret = devm_mutex_init(&srv->device, &data->set_speed_mutex);
300+
if (ret)
301+
return ret;
302+
139303
ret = devm_request_irq(&srv->device, srv->irq, pcie_bwnotif_irq,
140304
IRQF_SHARED, "PCIe bwctrl", srv);
141305
if (ret)
142306
return ret;
143307

144-
scoped_guard(rwsem_write, &pcie_bwctrl_lbms_rwsem) {
145-
port->link_bwctrl = no_free_ptr(data);
146-
pcie_bwnotif_enable(srv);
308+
scoped_guard(rwsem_write, &pcie_bwctrl_setspeed_rwsem) {
309+
scoped_guard(rwsem_write, &pcie_bwctrl_lbms_rwsem) {
310+
port->link_bwctrl = no_free_ptr(data);
311+
pcie_bwnotif_enable(srv);
312+
}
147313
}
148314

149315
pci_dbg(port, "enabled with IRQ %d\n", srv->irq);
@@ -154,8 +320,10 @@ static int pcie_bwnotif_probe(struct pcie_device *srv)
154320
static void pcie_bwnotif_remove(struct pcie_device *srv)
155321
{
156322
pcie_bwnotif_disable(srv->port);
157-
scoped_guard(rwsem_write, &pcie_bwctrl_lbms_rwsem)
158-
srv->port->link_bwctrl = NULL;
323+
324+
scoped_guard(rwsem_write, &pcie_bwctrl_setspeed_rwsem)
325+
scoped_guard(rwsem_write, &pcie_bwctrl_lbms_rwsem)
326+
srv->port->link_bwctrl = NULL;
159327
}
160328

161329
static int pcie_bwnotif_suspend(struct pcie_device *srv)

drivers/pci/quirks.c

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -113,16 +113,11 @@ int pcie_failed_link_retrain(struct pci_dev *dev)
113113

114114
pci_info(dev, "broken device, retraining non-functional downstream link at 2.5GT/s\n");
115115

116-
lnkctl2 &= ~PCI_EXP_LNKCTL2_TLS;
117-
lnkctl2 |= PCI_EXP_LNKCTL2_TLS_2_5GT;
118-
pcie_capability_write_word(dev, PCI_EXP_LNKCTL2, lnkctl2);
119-
120-
ret = pcie_retrain_link(dev, false);
116+
ret = pcie_set_target_speed(dev, PCIE_SPEED_2_5GT, false);
121117
if (ret) {
122118
pci_info(dev, "retraining failed\n");
123-
pcie_capability_write_word(dev, PCI_EXP_LNKCTL2,
124-
oldlnkctl2);
125-
pcie_retrain_link(dev, true);
119+
pcie_set_target_speed(dev, PCIE_LNKCTL2_TLS2SPEED(oldlnkctl2),
120+
true);
126121
return ret;
127122
}
128123

@@ -136,11 +131,7 @@ int pcie_failed_link_retrain(struct pci_dev *dev)
136131

137132
pci_info(dev, "removing 2.5GT/s downstream link speed restriction\n");
138133
pcie_capability_read_dword(dev, PCI_EXP_LNKCAP, &lnkcap);
139-
lnkctl2 &= ~PCI_EXP_LNKCTL2_TLS;
140-
lnkctl2 |= lnkcap & PCI_EXP_LNKCAP_SLS;
141-
pcie_capability_write_word(dev, PCI_EXP_LNKCTL2, lnkctl2);
142-
143-
ret = pcie_retrain_link(dev, false);
134+
ret = pcie_set_target_speed(dev, PCIE_LNKCAP_SLS2SPEED(lnkcap), false);
144135
if (ret) {
145136
pci_info(dev, "retraining failed\n");
146137
return ret;

include/linux/pci.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1797,9 +1797,19 @@ static inline int pci_irqd_intx_xlate(struct irq_domain *d,
17971797
#ifdef CONFIG_PCIEPORTBUS
17981798
extern bool pcie_ports_disabled;
17991799
extern bool pcie_ports_native;
1800+
1801+
int pcie_set_target_speed(struct pci_dev *port, enum pci_bus_speed speed_req,
1802+
bool use_lt);
18001803
#else
18011804
#define pcie_ports_disabled true
18021805
#define pcie_ports_native false
1806+
1807+
static inline int pcie_set_target_speed(struct pci_dev *port,
1808+
enum pci_bus_speed speed_req,
1809+
bool use_lt)
1810+
{
1811+
return -EOPNOTSUPP;
1812+
}
18031813
#endif
18041814

18051815
#define PCIE_LINK_STATE_L0S (BIT(0) | BIT(1)) /* Upstr/dwnstr L0s */

0 commit comments

Comments
 (0)