Skip to content

Commit a22f3bc

Browse files
idoschkuba-moo
authored andcommitted
mlxsw: core_thermal: Report valid current state during cooling device registration
Commit 31a0fa0 ("thermal/debugfs: Pass cooling device state to thermal_debug_cdev_add()") changed the thermal core to read the current state of the cooling device as part of the cooling device's registration. This is incompatible with the current implementation of the cooling device operations in mlxsw, leading to initialization failure with errors such as: mlxsw_spectrum 0000:01:00.0: Failed to register cooling device mlxsw_spectrum 0000:01:00.0: cannot register bus device The reason for the failure is that when the get current state operation is invoked the driver tries to derive the index of the cooling device by walking a per thermal zone array and looking for the matching cooling device pointer. However, the pointer is returned from the registration function and therefore only set in the array after the registration. The issue was later fixed by commit 1af89de ("thermal: core: Do not fail cdev registration because of invalid initial state") by not failing the registration of the cooling device if it cannot report a valid current state during registration, although drivers are responsible for ensuring that this will not happen. Therefore, make sure the driver is able to report a valid current state for the cooling device during registration by passing to the registration function a per cooling device private data that already has the cooling device index populated. While at it, call thermal_cooling_device_unregister() unconditionally since the function returns immediately if the cooling device pointer is NULL. Reviewed-by: Vadim Pasternak <[email protected]> Signed-off-by: Ido Schimmel <[email protected]> Signed-off-by: Petr Machata <[email protected]> Reviewed-by: Przemek Kitszel <[email protected]> Link: https://patch.msgid.link/c823c4678b6b7afb902c35b3551c81a053afd110.1720447210.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski <[email protected]>
1 parent b45c76e commit a22f3bc

File tree

1 file changed

+25
-26
lines changed

1 file changed

+25
-26
lines changed

drivers/net/ethernet/mellanox/mlxsw/core_thermal.c

Lines changed: 25 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,12 @@ static const struct mlxsw_cooling_states default_cooling_states[] = {
100100

101101
struct mlxsw_thermal;
102102

103+
struct mlxsw_thermal_cooling_device {
104+
struct mlxsw_thermal *thermal;
105+
struct thermal_cooling_device *cdev;
106+
unsigned int idx;
107+
};
108+
103109
struct mlxsw_thermal_module {
104110
struct mlxsw_thermal *parent;
105111
struct thermal_zone_device *tzdev;
@@ -123,7 +129,7 @@ struct mlxsw_thermal {
123129
const struct mlxsw_bus_info *bus_info;
124130
struct thermal_zone_device *tzdev;
125131
int polling_delay;
126-
struct thermal_cooling_device *cdevs[MLXSW_MFCR_PWMS_MAX];
132+
struct mlxsw_thermal_cooling_device cdevs[MLXSW_MFCR_PWMS_MAX];
127133
struct thermal_trip trips[MLXSW_THERMAL_NUM_TRIPS];
128134
struct mlxsw_cooling_states cooling_states[MLXSW_THERMAL_NUM_TRIPS];
129135
struct mlxsw_thermal_area line_cards[];
@@ -147,7 +153,7 @@ static int mlxsw_get_cooling_device_idx(struct mlxsw_thermal *thermal,
147153
int i;
148154

149155
for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++)
150-
if (thermal->cdevs[i] == cdev)
156+
if (thermal->cdevs[i].cdev == cdev)
151157
return i;
152158

153159
/* Allow mlxsw thermal zone binding to an external cooling device */
@@ -352,17 +358,14 @@ static int mlxsw_thermal_get_cur_state(struct thermal_cooling_device *cdev,
352358
unsigned long *p_state)
353359

354360
{
355-
struct mlxsw_thermal *thermal = cdev->devdata;
361+
struct mlxsw_thermal_cooling_device *mlxsw_cdev = cdev->devdata;
362+
struct mlxsw_thermal *thermal = mlxsw_cdev->thermal;
356363
struct device *dev = thermal->bus_info->dev;
357364
char mfsc_pl[MLXSW_REG_MFSC_LEN];
358-
int err, idx;
359365
u8 duty;
366+
int err;
360367

361-
idx = mlxsw_get_cooling_device_idx(thermal, cdev);
362-
if (idx < 0)
363-
return idx;
364-
365-
mlxsw_reg_mfsc_pack(mfsc_pl, idx, 0);
368+
mlxsw_reg_mfsc_pack(mfsc_pl, mlxsw_cdev->idx, 0);
366369
err = mlxsw_reg_query(thermal->core, MLXSW_REG(mfsc), mfsc_pl);
367370
if (err) {
368371
dev_err(dev, "Failed to query PWM duty\n");
@@ -378,22 +381,19 @@ static int mlxsw_thermal_set_cur_state(struct thermal_cooling_device *cdev,
378381
unsigned long state)
379382

380383
{
381-
struct mlxsw_thermal *thermal = cdev->devdata;
384+
struct mlxsw_thermal_cooling_device *mlxsw_cdev = cdev->devdata;
385+
struct mlxsw_thermal *thermal = mlxsw_cdev->thermal;
382386
struct device *dev = thermal->bus_info->dev;
383387
char mfsc_pl[MLXSW_REG_MFSC_LEN];
384-
int idx;
385388
int err;
386389

387390
if (state > MLXSW_THERMAL_MAX_STATE)
388391
return -EINVAL;
389392

390-
idx = mlxsw_get_cooling_device_idx(thermal, cdev);
391-
if (idx < 0)
392-
return idx;
393-
394393
/* Normalize the state to the valid speed range. */
395394
state = max_t(unsigned long, MLXSW_THERMAL_MIN_STATE, state);
396-
mlxsw_reg_mfsc_pack(mfsc_pl, idx, mlxsw_state_to_duty(state));
395+
mlxsw_reg_mfsc_pack(mfsc_pl, mlxsw_cdev->idx,
396+
mlxsw_state_to_duty(state));
397397
err = mlxsw_reg_write(thermal->core, MLXSW_REG(mfsc), mfsc_pl);
398398
if (err) {
399399
dev_err(dev, "Failed to write PWM duty\n");
@@ -753,17 +753,21 @@ int mlxsw_thermal_init(struct mlxsw_core *core,
753753
}
754754
for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++) {
755755
if (pwm_active & BIT(i)) {
756+
struct mlxsw_thermal_cooling_device *mlxsw_cdev;
756757
struct thermal_cooling_device *cdev;
757758

759+
mlxsw_cdev = &thermal->cdevs[i];
760+
mlxsw_cdev->thermal = thermal;
761+
mlxsw_cdev->idx = i;
758762
cdev = thermal_cooling_device_register("mlxsw_fan",
759-
thermal,
763+
mlxsw_cdev,
760764
&mlxsw_cooling_ops);
761765
if (IS_ERR(cdev)) {
762766
err = PTR_ERR(cdev);
763767
dev_err(dev, "Failed to register cooling device\n");
764768
goto err_thermal_cooling_device_register;
765769
}
766-
thermal->cdevs[i] = cdev;
770+
mlxsw_cdev->cdev = cdev;
767771
}
768772
}
769773

@@ -824,8 +828,7 @@ int mlxsw_thermal_init(struct mlxsw_core *core,
824828
err_thermal_zone_device_register:
825829
err_thermal_cooling_device_register:
826830
for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++)
827-
if (thermal->cdevs[i])
828-
thermal_cooling_device_unregister(thermal->cdevs[i]);
831+
thermal_cooling_device_unregister(thermal->cdevs[i].cdev);
829832
err_reg_write:
830833
err_reg_query:
831834
kfree(thermal);
@@ -847,12 +850,8 @@ void mlxsw_thermal_fini(struct mlxsw_thermal *thermal)
847850
thermal->tzdev = NULL;
848851
}
849852

850-
for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++) {
851-
if (thermal->cdevs[i]) {
852-
thermal_cooling_device_unregister(thermal->cdevs[i]);
853-
thermal->cdevs[i] = NULL;
854-
}
855-
}
853+
for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++)
854+
thermal_cooling_device_unregister(thermal->cdevs[i].cdev);
856855

857856
kfree(thermal);
858857
}

0 commit comments

Comments
 (0)