Skip to content

Commit c1fef61

Browse files
Sandipan Patrakuba-moo
authored andcommitted
net/mlx5: Implement thermal zone
Implement thermal zone support for mlx5 based HW. The NIC uses temperature sensor provided by ASIC to report current temperature to thermal core. Signed-off-by: Sandipan Patra <[email protected]> Reviewed-by: Gal Pressman <[email protected]> Signed-off-by: Saeed Mahameed <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Jakub Kicinski <[email protected]>
1 parent ceefcfb commit c1fef61

File tree

6 files changed

+164
-0
lines changed

6 files changed

+164
-0
lines changed

drivers/net/ethernet/mellanox/mlx5/core/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ mlx5_core-$(CONFIG_MLX5_ESWITCH) += esw/acl/helper.o \
7777

7878
mlx5_core-$(CONFIG_MLX5_BRIDGE) += esw/bridge.o en/rep/bridge.o
7979

80+
mlx5_core-$(CONFIG_THERMAL) += thermal.o
8081
mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o
8182
mlx5_core-$(CONFIG_VXLAN) += lib/vxlan.o
8283
mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o

drivers/net/ethernet/mellanox/mlx5/core/main.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
#include <linux/version.h>
5353
#include <net/devlink.h>
5454
#include "mlx5_core.h"
55+
#include "thermal.h"
5556
#include "lib/eq.h"
5657
#include "fs_core.h"
5758
#include "lib/mpfs.h"
@@ -1768,6 +1769,10 @@ static int probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
17681769
if (err)
17691770
dev_err(&pdev->dev, "mlx5_crdump_enable failed with error code %d\n", err);
17701771

1772+
err = mlx5_thermal_init(dev);
1773+
if (err)
1774+
dev_err(&pdev->dev, "mlx5_thermal_init failed with error code %d\n", err);
1775+
17711776
pci_save_state(pdev);
17721777
devlink_register(devlink);
17731778
return 0;
@@ -1796,6 +1801,7 @@ static void remove_one(struct pci_dev *pdev)
17961801
set_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state);
17971802
devlink_unregister(devlink);
17981803
mlx5_sriov_disable(pdev);
1804+
mlx5_thermal_uninit(dev);
17991805
mlx5_crdump_disable(dev);
18001806
mlx5_drain_health_wq(dev);
18011807
mlx5_uninit_one(dev);
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2+
// Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.
3+
4+
#include <linux/kernel.h>
5+
#include <linux/types.h>
6+
#include <linux/device.h>
7+
#include <linux/thermal.h>
8+
#include <linux/err.h>
9+
#include <linux/mlx5/driver.h>
10+
#include "mlx5_core.h"
11+
#include "thermal.h"
12+
13+
#define MLX5_THERMAL_POLL_INT_MSEC 1000
14+
#define MLX5_THERMAL_NUM_TRIPS 0
15+
#define MLX5_THERMAL_ASIC_SENSOR_INDEX 0
16+
17+
/* Bit string indicating the writeablility of trip points if any */
18+
#define MLX5_THERMAL_TRIP_MASK (BIT(MLX5_THERMAL_NUM_TRIPS) - 1)
19+
20+
struct mlx5_thermal {
21+
struct mlx5_core_dev *mdev;
22+
struct thermal_zone_device *tzdev;
23+
};
24+
25+
static int mlx5_thermal_get_mtmp_temp(struct mlx5_core_dev *mdev, u32 id, int *p_temp)
26+
{
27+
u32 mtmp_out[MLX5_ST_SZ_DW(mtmp_reg)] = {};
28+
u32 mtmp_in[MLX5_ST_SZ_DW(mtmp_reg)] = {};
29+
int err;
30+
31+
MLX5_SET(mtmp_reg, mtmp_in, sensor_index, id);
32+
33+
err = mlx5_core_access_reg(mdev, mtmp_in, sizeof(mtmp_in),
34+
mtmp_out, sizeof(mtmp_out),
35+
MLX5_REG_MTMP, 0, 0);
36+
37+
if (err)
38+
return err;
39+
40+
*p_temp = MLX5_GET(mtmp_reg, mtmp_out, temperature);
41+
42+
return 0;
43+
}
44+
45+
static int mlx5_thermal_get_temp(struct thermal_zone_device *tzdev,
46+
int *p_temp)
47+
{
48+
struct mlx5_thermal *thermal = tzdev->devdata;
49+
struct mlx5_core_dev *mdev = thermal->mdev;
50+
int err;
51+
52+
err = mlx5_thermal_get_mtmp_temp(mdev, MLX5_THERMAL_ASIC_SENSOR_INDEX, p_temp);
53+
54+
if (err)
55+
return err;
56+
57+
/* The unit of temp returned is in 0.125 C. The thermal
58+
* framework expects the value in 0.001 C.
59+
*/
60+
*p_temp *= 125;
61+
62+
return 0;
63+
}
64+
65+
static struct thermal_zone_device_ops mlx5_thermal_ops = {
66+
.get_temp = mlx5_thermal_get_temp,
67+
};
68+
69+
int mlx5_thermal_init(struct mlx5_core_dev *mdev)
70+
{
71+
struct mlx5_thermal *thermal;
72+
struct thermal_zone_device *tzd;
73+
const char *data = "mlx5";
74+
75+
tzd = thermal_zone_get_zone_by_name(data);
76+
if (!IS_ERR(tzd))
77+
return 0;
78+
79+
thermal = kzalloc(sizeof(*thermal), GFP_KERNEL);
80+
if (!thermal)
81+
return -ENOMEM;
82+
83+
thermal->mdev = mdev;
84+
thermal->tzdev = thermal_zone_device_register(data,
85+
MLX5_THERMAL_NUM_TRIPS,
86+
MLX5_THERMAL_TRIP_MASK,
87+
thermal,
88+
&mlx5_thermal_ops,
89+
NULL, 0, MLX5_THERMAL_POLL_INT_MSEC);
90+
if (IS_ERR(thermal->tzdev)) {
91+
dev_err(mdev->device, "Failed to register thermal zone device (%s) %ld\n",
92+
data, PTR_ERR(thermal->tzdev));
93+
kfree(thermal);
94+
return -EINVAL;
95+
}
96+
97+
mdev->thermal = thermal;
98+
return 0;
99+
}
100+
101+
void mlx5_thermal_uninit(struct mlx5_core_dev *mdev)
102+
{
103+
if (!mdev->thermal)
104+
return;
105+
106+
thermal_zone_device_unregister(mdev->thermal->tzdev);
107+
kfree(mdev->thermal);
108+
}
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2+
* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.
3+
*/
4+
#ifndef __MLX5_THERMAL_DRIVER_H
5+
#define __MLX5_THERMAL_DRIVER_H
6+
7+
#if IS_ENABLED(CONFIG_THERMAL)
8+
int mlx5_thermal_init(struct mlx5_core_dev *mdev);
9+
void mlx5_thermal_uninit(struct mlx5_core_dev *mdev);
10+
#else
11+
static inline int mlx5_thermal_init(struct mlx5_core_dev *mdev)
12+
{
13+
mdev->thermal = NULL;
14+
return 0;
15+
}
16+
17+
static inline void mlx5_thermal_uninit(struct mlx5_core_dev *mdev) { }
18+
#endif
19+
20+
#endif /* __MLX5_THERMAL_DRIVER_H */

include/linux/mlx5/driver.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,7 @@ enum {
134134
MLX5_REG_PCAM = 0x507f,
135135
MLX5_REG_NODE_DESC = 0x6001,
136136
MLX5_REG_HOST_ENDIANNESS = 0x7004,
137+
MLX5_REG_MTMP = 0x900A,
137138
MLX5_REG_MCIA = 0x9014,
138139
MLX5_REG_MFRL = 0x9028,
139140
MLX5_REG_MLCR = 0x902b,
@@ -731,6 +732,7 @@ struct mlx5_fw_tracer;
731732
struct mlx5_vxlan;
732733
struct mlx5_geneve;
733734
struct mlx5_hv_vhca;
735+
struct mlx5_thermal;
734736

735737
#define MLX5_LOG_SW_ICM_BLOCK_SIZE(dev) (MLX5_CAP_DEV_MEM(dev, log_sw_icm_alloc_granularity))
736738
#define MLX5_SW_ICM_BLOCK_SIZE(dev) (1 << MLX5_LOG_SW_ICM_BLOCK_SIZE(dev))
@@ -808,6 +810,7 @@ struct mlx5_core_dev {
808810
struct mlx5_rsc_dump *rsc_dump;
809811
u32 vsc_addr;
810812
struct mlx5_hv_vhca *hv_vhca;
813+
struct mlx5_thermal *thermal;
811814
};
812815

813816
struct mlx5_db {

include/linux/mlx5/mlx5_ifc.h

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10869,6 +10869,31 @@ struct mlx5_ifc_mrtc_reg_bits {
1086910869
u8 time_l[0x20];
1087010870
};
1087110871

10872+
struct mlx5_ifc_mtmp_reg_bits {
10873+
u8 reserved_at_0[0x14];
10874+
u8 sensor_index[0xc];
10875+
10876+
u8 reserved_at_20[0x10];
10877+
u8 temperature[0x10];
10878+
10879+
u8 mte[0x1];
10880+
u8 mtr[0x1];
10881+
u8 reserved_at_42[0xe];
10882+
u8 max_temperature[0x10];
10883+
10884+
u8 tee[0x2];
10885+
u8 reserved_at_62[0xe];
10886+
u8 temp_threshold_hi[0x10];
10887+
10888+
u8 reserved_at_80[0x10];
10889+
u8 temp_threshold_lo[0x10];
10890+
10891+
u8 reserved_at_a0[0x20];
10892+
10893+
u8 sensor_name_hi[0x20];
10894+
u8 sensor_name_lo[0x20];
10895+
};
10896+
1087210897
union mlx5_ifc_ports_control_registers_document_bits {
1087310898
struct mlx5_ifc_bufferx_reg_bits bufferx_reg;
1087410899
struct mlx5_ifc_eth_2819_cntrs_grp_data_layout_bits eth_2819_cntrs_grp_data_layout;
@@ -10931,6 +10956,7 @@ union mlx5_ifc_ports_control_registers_document_bits {
1093110956
struct mlx5_ifc_mfrl_reg_bits mfrl_reg;
1093210957
struct mlx5_ifc_mtutc_reg_bits mtutc_reg;
1093310958
struct mlx5_ifc_mrtc_reg_bits mrtc_reg;
10959+
struct mlx5_ifc_mtmp_reg_bits mtmp_reg;
1093410960
u8 reserved_at_0[0x60e0];
1093510961
};
1093610962

0 commit comments

Comments
 (0)