Skip to content

Commit 866ca95

Browse files
committed
Merge branch 'nvme-5.5' of git://git.infradead.org/nvme into for-5.5/drivers-post
Pull NVMe changes from Keith: "- The only new feature is the optional hwmon support for nvme (Guenter and Akinobu) - A universal work-around for controllers reading discard payloads beyond the range boundary (Eduard) - Chaitanya graciously agreed to share the target driver maintenance" * 'nvme-5.5' of git://git.infradead.org/nvme: nvme: hwmon: add quirk to avoid changing temperature threshold nvme: hwmon: provide temperature min and max values for each sensor nvmet: add another maintainer nvme: Discard workaround for non-conformant devices nvme: Add hardware monitoring support
2 parents ad512f2 + 6c6aa2f commit 866ca95

File tree

8 files changed

+307
-4
lines changed

8 files changed

+307
-4
lines changed

MAINTAINERS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11633,6 +11633,7 @@ F: drivers/nvme/target/fcloop.c
1163311633
NVM EXPRESS TARGET DRIVER
1163411634
M: Christoph Hellwig <[email protected]>
1163511635
M: Sagi Grimberg <[email protected]>
11636+
M: Chaitanya Kulkarni <[email protected]>
1163611637
1163711638
T: git://git.infradead.org/nvme.git
1163811639
W: http://git.infradead.org/nvme.git

drivers/nvme/host/Kconfig

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,16 @@ config NVME_MULTIPATH
2323
/dev/nvmeXnY device will show up for each NVMe namespaces,
2424
even if it is accessible through multiple controllers.
2525

26+
config NVME_HWMON
27+
bool "NVMe hardware monitoring"
28+
depends on (NVME_CORE=y && HWMON=y) || (NVME_CORE=m && HWMON)
29+
help
30+
This provides support for NVMe hardware monitoring. If enabled,
31+
a hardware monitoring device will be created for each NVMe drive
32+
in the system.
33+
34+
If unsure, say N.
35+
2636
config NVME_FABRICS
2737
tristate
2838

drivers/nvme/host/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ nvme-core-$(CONFIG_TRACING) += trace.o
1414
nvme-core-$(CONFIG_NVME_MULTIPATH) += multipath.o
1515
nvme-core-$(CONFIG_NVM) += lightnvm.o
1616
nvme-core-$(CONFIG_FAULT_INJECTION_DEBUG_FS) += fault_inject.o
17+
nvme-core-$(CONFIG_NVME_HWMON) += hwmon.o
1718

1819
nvme-y += pci.o
1920

drivers/nvme/host/core.c

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -574,8 +574,14 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req,
574574
struct nvme_dsm_range *range;
575575
struct bio *bio;
576576

577-
range = kmalloc_array(segments, sizeof(*range),
578-
GFP_ATOMIC | __GFP_NOWARN);
577+
/*
578+
* Some devices do not consider the DSM 'Number of Ranges' field when
579+
* determining how much data to DMA. Always allocate memory for maximum
580+
* number of segments to prevent device reading beyond end of buffer.
581+
*/
582+
static const size_t alloc_size = sizeof(*range) * NVME_DSM_MAX_RANGES;
583+
584+
range = kzalloc(alloc_size, GFP_ATOMIC | __GFP_NOWARN);
579585
if (!range) {
580586
/*
581587
* If we fail allocation our range, fallback to the controller
@@ -615,7 +621,7 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req,
615621

616622
req->special_vec.bv_page = virt_to_page(range);
617623
req->special_vec.bv_offset = offset_in_page(range);
618-
req->special_vec.bv_len = sizeof(*range) * segments;
624+
req->special_vec.bv_len = alloc_size;
619625
req->rq_flags |= RQF_SPECIAL_PAYLOAD;
620626

621627
return BLK_STS_OK;
@@ -2760,6 +2766,9 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
27602766
ctrl->oncs = le16_to_cpu(id->oncs);
27612767
ctrl->mtfa = le16_to_cpu(id->mtfa);
27622768
ctrl->oaes = le32_to_cpu(id->oaes);
2769+
ctrl->wctemp = le16_to_cpu(id->wctemp);
2770+
ctrl->cctemp = le16_to_cpu(id->cctemp);
2771+
27632772
atomic_set(&ctrl->abort_limit, id->acl + 1);
27642773
ctrl->vwc = id->vwc;
27652774
if (id->mdts)
@@ -2859,6 +2868,9 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
28592868
if (ret < 0)
28602869
return ret;
28612870

2871+
if (!ctrl->identified)
2872+
nvme_hwmon_init(ctrl);
2873+
28622874
ctrl->identified = true;
28632875

28642876
return 0;

drivers/nvme/host/hwmon.c

Lines changed: 259 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,259 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
/*
3+
* NVM Express hardware monitoring support
4+
* Copyright (c) 2019, Guenter Roeck
5+
*/
6+
7+
#include <linux/hwmon.h>
8+
#include <asm/unaligned.h>
9+
10+
#include "nvme.h"
11+
12+
/* These macros should be moved to linux/temperature.h */
13+
#define MILLICELSIUS_TO_KELVIN(t) DIV_ROUND_CLOSEST((t) + 273150, 1000)
14+
#define KELVIN_TO_MILLICELSIUS(t) ((t) * 1000L - 273150)
15+
16+
struct nvme_hwmon_data {
17+
struct nvme_ctrl *ctrl;
18+
struct nvme_smart_log log;
19+
struct mutex read_lock;
20+
};
21+
22+
static int nvme_get_temp_thresh(struct nvme_ctrl *ctrl, int sensor, bool under,
23+
long *temp)
24+
{
25+
unsigned int threshold = sensor << NVME_TEMP_THRESH_SELECT_SHIFT;
26+
u32 status;
27+
int ret;
28+
29+
if (under)
30+
threshold |= NVME_TEMP_THRESH_TYPE_UNDER;
31+
32+
ret = nvme_get_features(ctrl, NVME_FEAT_TEMP_THRESH, threshold, NULL, 0,
33+
&status);
34+
if (ret > 0)
35+
return -EIO;
36+
if (ret < 0)
37+
return ret;
38+
*temp = KELVIN_TO_MILLICELSIUS(status & NVME_TEMP_THRESH_MASK);
39+
40+
return 0;
41+
}
42+
43+
static int nvme_set_temp_thresh(struct nvme_ctrl *ctrl, int sensor, bool under,
44+
long temp)
45+
{
46+
unsigned int threshold = sensor << NVME_TEMP_THRESH_SELECT_SHIFT;
47+
int ret;
48+
49+
temp = MILLICELSIUS_TO_KELVIN(temp);
50+
threshold |= clamp_val(temp, 0, NVME_TEMP_THRESH_MASK);
51+
52+
if (under)
53+
threshold |= NVME_TEMP_THRESH_TYPE_UNDER;
54+
55+
ret = nvme_set_features(ctrl, NVME_FEAT_TEMP_THRESH, threshold, NULL, 0,
56+
NULL);
57+
if (ret > 0)
58+
return -EIO;
59+
60+
return ret;
61+
}
62+
63+
static int nvme_hwmon_get_smart_log(struct nvme_hwmon_data *data)
64+
{
65+
int ret;
66+
67+
ret = nvme_get_log(data->ctrl, NVME_NSID_ALL, NVME_LOG_SMART, 0,
68+
&data->log, sizeof(data->log), 0);
69+
70+
return ret <= 0 ? ret : -EIO;
71+
}
72+
73+
static int nvme_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
74+
u32 attr, int channel, long *val)
75+
{
76+
struct nvme_hwmon_data *data = dev_get_drvdata(dev);
77+
struct nvme_smart_log *log = &data->log;
78+
int temp;
79+
int err;
80+
81+
/*
82+
* First handle attributes which don't require us to read
83+
* the smart log.
84+
*/
85+
switch (attr) {
86+
case hwmon_temp_max:
87+
return nvme_get_temp_thresh(data->ctrl, channel, false, val);
88+
case hwmon_temp_min:
89+
return nvme_get_temp_thresh(data->ctrl, channel, true, val);
90+
case hwmon_temp_crit:
91+
*val = KELVIN_TO_MILLICELSIUS(data->ctrl->cctemp);
92+
return 0;
93+
default:
94+
break;
95+
}
96+
97+
mutex_lock(&data->read_lock);
98+
err = nvme_hwmon_get_smart_log(data);
99+
if (err)
100+
goto unlock;
101+
102+
switch (attr) {
103+
case hwmon_temp_input:
104+
if (!channel)
105+
temp = get_unaligned_le16(log->temperature);
106+
else
107+
temp = le16_to_cpu(log->temp_sensor[channel - 1]);
108+
*val = KELVIN_TO_MILLICELSIUS(temp);
109+
break;
110+
case hwmon_temp_alarm:
111+
*val = !!(log->critical_warning & NVME_SMART_CRIT_TEMPERATURE);
112+
break;
113+
default:
114+
err = -EOPNOTSUPP;
115+
break;
116+
}
117+
unlock:
118+
mutex_unlock(&data->read_lock);
119+
return err;
120+
}
121+
122+
static int nvme_hwmon_write(struct device *dev, enum hwmon_sensor_types type,
123+
u32 attr, int channel, long val)
124+
{
125+
struct nvme_hwmon_data *data = dev_get_drvdata(dev);
126+
127+
switch (attr) {
128+
case hwmon_temp_max:
129+
return nvme_set_temp_thresh(data->ctrl, channel, false, val);
130+
case hwmon_temp_min:
131+
return nvme_set_temp_thresh(data->ctrl, channel, true, val);
132+
default:
133+
break;
134+
}
135+
136+
return -EOPNOTSUPP;
137+
}
138+
139+
static const char * const nvme_hwmon_sensor_names[] = {
140+
"Composite",
141+
"Sensor 1",
142+
"Sensor 2",
143+
"Sensor 3",
144+
"Sensor 4",
145+
"Sensor 5",
146+
"Sensor 6",
147+
"Sensor 7",
148+
"Sensor 8",
149+
};
150+
151+
static int nvme_hwmon_read_string(struct device *dev,
152+
enum hwmon_sensor_types type, u32 attr,
153+
int channel, const char **str)
154+
{
155+
*str = nvme_hwmon_sensor_names[channel];
156+
return 0;
157+
}
158+
159+
static umode_t nvme_hwmon_is_visible(const void *_data,
160+
enum hwmon_sensor_types type,
161+
u32 attr, int channel)
162+
{
163+
const struct nvme_hwmon_data *data = _data;
164+
165+
switch (attr) {
166+
case hwmon_temp_crit:
167+
if (!channel && data->ctrl->cctemp)
168+
return 0444;
169+
break;
170+
case hwmon_temp_max:
171+
case hwmon_temp_min:
172+
if ((!channel && data->ctrl->wctemp) ||
173+
(channel && data->log.temp_sensor[channel - 1])) {
174+
if (data->ctrl->quirks &
175+
NVME_QUIRK_NO_TEMP_THRESH_CHANGE)
176+
return 0444;
177+
return 0644;
178+
}
179+
break;
180+
case hwmon_temp_alarm:
181+
if (!channel)
182+
return 0444;
183+
break;
184+
case hwmon_temp_input:
185+
case hwmon_temp_label:
186+
if (!channel || data->log.temp_sensor[channel - 1])
187+
return 0444;
188+
break;
189+
default:
190+
break;
191+
}
192+
return 0;
193+
}
194+
195+
static const struct hwmon_channel_info *nvme_hwmon_info[] = {
196+
HWMON_CHANNEL_INFO(chip, HWMON_C_REGISTER_TZ),
197+
HWMON_CHANNEL_INFO(temp,
198+
HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN |
199+
HWMON_T_CRIT | HWMON_T_LABEL | HWMON_T_ALARM,
200+
HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN |
201+
HWMON_T_LABEL,
202+
HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN |
203+
HWMON_T_LABEL,
204+
HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN |
205+
HWMON_T_LABEL,
206+
HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN |
207+
HWMON_T_LABEL,
208+
HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN |
209+
HWMON_T_LABEL,
210+
HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN |
211+
HWMON_T_LABEL,
212+
HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN |
213+
HWMON_T_LABEL,
214+
HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN |
215+
HWMON_T_LABEL),
216+
NULL
217+
};
218+
219+
static const struct hwmon_ops nvme_hwmon_ops = {
220+
.is_visible = nvme_hwmon_is_visible,
221+
.read = nvme_hwmon_read,
222+
.read_string = nvme_hwmon_read_string,
223+
.write = nvme_hwmon_write,
224+
};
225+
226+
static const struct hwmon_chip_info nvme_hwmon_chip_info = {
227+
.ops = &nvme_hwmon_ops,
228+
.info = nvme_hwmon_info,
229+
};
230+
231+
void nvme_hwmon_init(struct nvme_ctrl *ctrl)
232+
{
233+
struct device *dev = ctrl->dev;
234+
struct nvme_hwmon_data *data;
235+
struct device *hwmon;
236+
int err;
237+
238+
data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
239+
if (!data)
240+
return;
241+
242+
data->ctrl = ctrl;
243+
mutex_init(&data->read_lock);
244+
245+
err = nvme_hwmon_get_smart_log(data);
246+
if (err) {
247+
dev_warn(dev, "Failed to read smart log (error %d)\n", err);
248+
devm_kfree(dev, data);
249+
return;
250+
}
251+
252+
hwmon = devm_hwmon_device_register_with_info(dev, "nvme", data,
253+
&nvme_hwmon_chip_info,
254+
NULL);
255+
if (IS_ERR(hwmon)) {
256+
dev_warn(dev, "Failed to instantiate hwmon device\n");
257+
devm_kfree(dev, data);
258+
}
259+
}

drivers/nvme/host/nvme.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,11 @@ enum nvme_quirks {
114114
* Prevent tag overlap between queues
115115
*/
116116
NVME_QUIRK_SHARED_TAGS = (1 << 13),
117+
118+
/*
119+
* Don't change the value of the temperature threshold feature
120+
*/
121+
NVME_QUIRK_NO_TEMP_THRESH_CHANGE = (1 << 14),
117122
};
118123

119124
/*
@@ -230,6 +235,8 @@ struct nvme_ctrl {
230235
u16 kas;
231236
u8 npss;
232237
u8 apsta;
238+
u16 wctemp;
239+
u16 cctemp;
233240
u32 oaes;
234241
u32 aen_result;
235242
u32 ctratt;
@@ -665,4 +672,10 @@ static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev)
665672
return dev_to_disk(dev)->private_data;
666673
}
667674

675+
#ifdef CONFIG_NVME_HWMON
676+
void nvme_hwmon_init(struct nvme_ctrl *ctrl);
677+
#else
678+
static inline void nvme_hwmon_init(struct nvme_ctrl *ctrl) { }
679+
#endif
680+
668681
#endif /* _NVME_H */

drivers/nvme/host/pci.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3065,7 +3065,8 @@ static const struct pci_device_id nvme_id_table[] = {
30653065
NVME_QUIRK_DEALLOCATE_ZEROES, },
30663066
{ PCI_VDEVICE(INTEL, 0xf1a5), /* Intel 600P/P3100 */
30673067
.driver_data = NVME_QUIRK_NO_DEEPEST_PS |
3068-
NVME_QUIRK_MEDIUM_PRIO_SQ },
3068+
NVME_QUIRK_MEDIUM_PRIO_SQ |
3069+
NVME_QUIRK_NO_TEMP_THRESH_CHANGE },
30693070
{ PCI_VDEVICE(INTEL, 0xf1a6), /* Intel 760p/Pro 7600p */
30703071
.driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, },
30713072
{ PCI_VDEVICE(INTEL, 0x5845), /* Qemu emulated controller */

include/linux/nvme.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -804,6 +804,12 @@ struct nvme_write_zeroes_cmd {
804804

805805
/* Features */
806806

807+
enum {
808+
NVME_TEMP_THRESH_MASK = 0xffff,
809+
NVME_TEMP_THRESH_SELECT_SHIFT = 16,
810+
NVME_TEMP_THRESH_TYPE_UNDER = 0x100000,
811+
};
812+
807813
struct nvme_feat_auto_pst {
808814
__le64 entries[32];
809815
};

0 commit comments

Comments
 (0)