Skip to content

Commit 7cd427e

Browse files
committed
Merge branch 'mlxsw-Expose-transceiver-overheat-counter'
Ido Schimmel says: ==================== mlxsw: Expose transceiver overheat counter Amit says: An overheated transceiver can be the root cause of various network problems such as link flapping. Counting the number of times a transceiver's temperature was higher than its configured threshold can therefore help in debugging such issues. This patch set exposes a transceiver overheat counter via ethtool. This is achieved by configuring the Spectrum ASIC to generate events whenever a transceiver is overheated. The temperature thresholds are queried from the transceiver (if available) and set to the default otherwise. Example: ... transceiver_overheat: 2 Patch set overview: Patches #1-#3 add required device registers Patches #4-#5 add required infrastructure in mlxsw to configure and count overheat events Patches #6-#9 gradually add support for the transceiver overheat counter Patch #10 exposes the transceiver overheat counter via ethtool ==================== Signed-off-by: David S. Miller <[email protected]>
2 parents 603d11c + 69f6d4e commit 7cd427e

File tree

10 files changed

+660
-5
lines changed

10 files changed

+660
-5
lines changed

drivers/net/ethernet/mellanox/mlxsw/core.c

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include <trace/events/devlink.h>
2727

2828
#include "core.h"
29+
#include "core_env.h"
2930
#include "item.h"
3031
#include "cmd.h"
3132
#include "port.h"
@@ -87,6 +88,8 @@ struct mlxsw_core {
8788
struct {
8889
struct devlink_health_reporter *fw_fatal;
8990
} health;
91+
struct mlxsw_env *env;
92+
bool is_initialized; /* Denotes if core was already initialized. */
9093
unsigned long driver_priv[];
9194
/* driver_priv has to be always the last item */
9295
};
@@ -133,6 +136,11 @@ bool mlxsw_core_res_query_enabled(const struct mlxsw_core *mlxsw_core)
133136
}
134137
EXPORT_SYMBOL(mlxsw_core_res_query_enabled);
135138

139+
bool mlxsw_core_temp_warn_enabled(const struct mlxsw_core *mlxsw_core)
140+
{
141+
return mlxsw_core->driver->temp_warn_enabled;
142+
}
143+
136144
bool
137145
mlxsw_core_fw_rev_minor_subminor_validate(const struct mlxsw_fw_rev *rev,
138146
const struct mlxsw_fw_rev *req_rev)
@@ -1943,13 +1951,20 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
19431951
if (err)
19441952
goto err_thermal_init;
19451953

1954+
err = mlxsw_env_init(mlxsw_core, &mlxsw_core->env);
1955+
if (err)
1956+
goto err_env_init;
1957+
1958+
mlxsw_core->is_initialized = true;
19461959
devlink_params_publish(devlink);
19471960

19481961
if (!reload)
19491962
devlink_reload_enable(devlink);
19501963

19511964
return 0;
19521965

1966+
err_env_init:
1967+
mlxsw_thermal_fini(mlxsw_core->thermal);
19531968
err_thermal_init:
19541969
mlxsw_hwmon_fini(mlxsw_core->hwmon);
19551970
err_hwmon_init:
@@ -2026,6 +2041,8 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core,
20262041
}
20272042

20282043
devlink_params_unpublish(devlink);
2044+
mlxsw_core->is_initialized = false;
2045+
mlxsw_env_fini(mlxsw_core->env);
20292046
mlxsw_thermal_fini(mlxsw_core->thermal);
20302047
mlxsw_hwmon_fini(mlxsw_core->hwmon);
20312048
if (mlxsw_core->driver->fini)
@@ -2829,6 +2846,16 @@ mlxsw_core_port_devlink_port_get(struct mlxsw_core *mlxsw_core,
28292846
}
28302847
EXPORT_SYMBOL(mlxsw_core_port_devlink_port_get);
28312848

2849+
struct mlxsw_env *mlxsw_core_env(const struct mlxsw_core *mlxsw_core)
2850+
{
2851+
return mlxsw_core->env;
2852+
}
2853+
2854+
bool mlxsw_core_is_initialized(const struct mlxsw_core *mlxsw_core)
2855+
{
2856+
return mlxsw_core->is_initialized;
2857+
}
2858+
28322859
int mlxsw_core_module_max_width(struct mlxsw_core *mlxsw_core, u8 module)
28332860
{
28342861
enum mlxsw_reg_pmtm_module_type module_type;

drivers/net/ethernet/mellanox/mlxsw/core.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ void *mlxsw_core_driver_priv(struct mlxsw_core *mlxsw_core);
3232

3333
bool mlxsw_core_res_query_enabled(const struct mlxsw_core *mlxsw_core);
3434

35+
bool mlxsw_core_temp_warn_enabled(const struct mlxsw_core *mlxsw_core);
36+
3537
bool
3638
mlxsw_core_fw_rev_minor_subminor_validate(const struct mlxsw_fw_rev *rev,
3739
const struct mlxsw_fw_rev *req_rev);
@@ -221,6 +223,8 @@ enum devlink_port_type mlxsw_core_port_type_get(struct mlxsw_core *mlxsw_core,
221223
struct devlink_port *
222224
mlxsw_core_port_devlink_port_get(struct mlxsw_core *mlxsw_core,
223225
u8 local_port);
226+
struct mlxsw_env *mlxsw_core_env(const struct mlxsw_core *mlxsw_core);
227+
bool mlxsw_core_is_initialized(const struct mlxsw_core *mlxsw_core);
224228
int mlxsw_core_module_max_width(struct mlxsw_core *mlxsw_core, u8 module);
225229

226230
int mlxsw_core_schedule_dw(struct delayed_work *dwork, unsigned long delay);
@@ -371,6 +375,7 @@ struct mlxsw_driver {
371375
const struct mlxsw_config_profile *profile;
372376
bool res_query_enabled;
373377
bool fw_fatal_enabled;
378+
bool temp_warn_enabled;
374379
};
375380

376381
int mlxsw_core_kvd_sizes_get(struct mlxsw_core *mlxsw_core,

0 commit comments

Comments
 (0)