Skip to content

Commit b740306

Browse files
committed
Merge branch 'nvme-5.3' of git://git.infradead.org/nvme into for-linus
Pull NVMe fixes from Christoph: "Lof of fixes all over the place, and two very minor features that were in the nvme tree by the end of the merge window, but hadn't made it out to Jens yet." * 'nvme-5.3' of git://git.infradead.org/nvme: nvme: fix regression upon hot device removal and insertion nvme-fc: fix module unloads while lports still pending nvme-tcp: don't use sendpage for SLAB pages nvme-tcp: set the STABLE_WRITES flag when data digests are enabled nvmet: print a hint while rejecting NSID 0 or 0xffffffff nvme-multipath: do not select namespaces which are about to be removed nvme-multipath: also check for a disabled path if there is a single sibling nvme-multipath: factor out a nvme_path_is_disabled helper nvme: set physical block size and optimal I/O size nvme: add I/O characteristics fields nvmet: export I/O characteristics attributes in Identify nvme-trace: add delete completion and submission queue to admin cmds tracer nvme-trace: fix spelling mistake "spcecific" -> "specific" nvme-pci: limit max_hw_sectors based on the DMA max mapping size nvme-pci: check for NULL return from pci_alloc_p2pmem() nvme-pci: don't create a read hctx mapping without read queues nvme-pci: don't fall back to a 32-bit DMA mask nvme-pci: make nvme_dev_pm_ops static nvme-fcloop: resolve warnings on RCU usage and sleep warnings nvme-fcloop: fix inconsistent lock state warnings
2 parents 4ddeaae + 420dc73 commit b740306

File tree

14 files changed

+237
-51
lines changed

14 files changed

+237
-51
lines changed

drivers/nvme/host/core.c

Lines changed: 39 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include <linux/hdreg.h>
1212
#include <linux/kernel.h>
1313
#include <linux/module.h>
14+
#include <linux/backing-dev.h>
1415
#include <linux/list_sort.h>
1516
#include <linux/slab.h>
1617
#include <linux/types.h>
@@ -1626,6 +1627,7 @@ static void nvme_update_disk_info(struct gendisk *disk,
16261627
{
16271628
sector_t capacity = le64_to_cpu(id->nsze) << (ns->lba_shift - 9);
16281629
unsigned short bs = 1 << ns->lba_shift;
1630+
u32 atomic_bs, phys_bs, io_opt;
16291631

16301632
if (ns->lba_shift > PAGE_SHIFT) {
16311633
/* unsupported block size, set capacity to 0 later */
@@ -1634,9 +1636,37 @@ static void nvme_update_disk_info(struct gendisk *disk,
16341636
blk_mq_freeze_queue(disk->queue);
16351637
blk_integrity_unregister(disk);
16361638

1639+
if (id->nabo == 0) {
1640+
/*
1641+
* Bit 1 indicates whether NAWUPF is defined for this namespace
1642+
* and whether it should be used instead of AWUPF. If NAWUPF ==
1643+
* 0 then AWUPF must be used instead.
1644+
*/
1645+
if (id->nsfeat & (1 << 1) && id->nawupf)
1646+
atomic_bs = (1 + le16_to_cpu(id->nawupf)) * bs;
1647+
else
1648+
atomic_bs = (1 + ns->ctrl->subsys->awupf) * bs;
1649+
} else {
1650+
atomic_bs = bs;
1651+
}
1652+
phys_bs = bs;
1653+
io_opt = bs;
1654+
if (id->nsfeat & (1 << 4)) {
1655+
/* NPWG = Namespace Preferred Write Granularity */
1656+
phys_bs *= 1 + le16_to_cpu(id->npwg);
1657+
/* NOWS = Namespace Optimal Write Size */
1658+
io_opt *= 1 + le16_to_cpu(id->nows);
1659+
}
1660+
16371661
blk_queue_logical_block_size(disk->queue, bs);
1638-
blk_queue_physical_block_size(disk->queue, bs);
1639-
blk_queue_io_min(disk->queue, bs);
1662+
/*
1663+
* Linux filesystems assume writing a single physical block is
1664+
* an atomic operation. Hence limit the physical block size to the
1665+
* value of the Atomic Write Unit Power Fail parameter.
1666+
*/
1667+
blk_queue_physical_block_size(disk->queue, min(phys_bs, atomic_bs));
1668+
blk_queue_io_min(disk->queue, phys_bs);
1669+
blk_queue_io_opt(disk->queue, io_opt);
16401670

16411671
if (ns->ms && !ns->ext &&
16421672
(ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED))
@@ -2386,8 +2416,8 @@ static bool nvme_validate_cntlid(struct nvme_subsystem *subsys,
23862416
lockdep_assert_held(&nvme_subsystems_lock);
23872417

23882418
list_for_each_entry(tmp, &subsys->ctrls, subsys_entry) {
2389-
if (ctrl->state == NVME_CTRL_DELETING ||
2390-
ctrl->state == NVME_CTRL_DEAD)
2419+
if (tmp->state == NVME_CTRL_DELETING ||
2420+
tmp->state == NVME_CTRL_DEAD)
23912421
continue;
23922422

23932423
if (tmp->cntlid == ctrl->cntlid) {
@@ -2433,6 +2463,7 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
24332463
memcpy(subsys->firmware_rev, id->fr, sizeof(subsys->firmware_rev));
24342464
subsys->vendor_id = le16_to_cpu(id->vid);
24352465
subsys->cmic = id->cmic;
2466+
subsys->awupf = le16_to_cpu(id->awupf);
24362467
#ifdef CONFIG_NVME_MULTIPATH
24372468
subsys->iopolicy = NVME_IOPOLICY_NUMA;
24382469
#endif
@@ -3274,6 +3305,10 @@ static int nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
32743305
goto out_free_ns;
32753306
}
32763307

3308+
if (ctrl->opts->data_digest)
3309+
ns->queue->backing_dev_info->capabilities
3310+
|= BDI_CAP_STABLE_WRITES;
3311+
32773312
blk_queue_flag_set(QUEUE_FLAG_NONROT, ns->queue);
32783313
if (ctrl->ops->flags & NVME_F_PCI_P2PDMA)
32793314
blk_queue_flag_set(QUEUE_FLAG_PCI_P2PDMA, ns->queue);

drivers/nvme/host/fc.c

Lines changed: 48 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,9 @@ static DEFINE_IDA(nvme_fc_ctrl_cnt);
204204

205205
static struct workqueue_struct *nvme_fc_wq;
206206

207+
static bool nvme_fc_waiting_to_unload;
208+
static DECLARE_COMPLETION(nvme_fc_unload_proceed);
209+
207210
/*
208211
* These items are short-term. They will eventually be moved into
209212
* a generic FC class. See comments in module init.
@@ -229,6 +232,8 @@ nvme_fc_free_lport(struct kref *ref)
229232
/* remove from transport list */
230233
spin_lock_irqsave(&nvme_fc_lock, flags);
231234
list_del(&lport->port_list);
235+
if (nvme_fc_waiting_to_unload && list_empty(&nvme_fc_lport_list))
236+
complete(&nvme_fc_unload_proceed);
232237
spin_unlock_irqrestore(&nvme_fc_lock, flags);
233238

234239
ida_simple_remove(&nvme_fc_local_port_cnt, lport->localport.port_num);
@@ -3456,11 +3461,51 @@ static int __init nvme_fc_init_module(void)
34563461
return ret;
34573462
}
34583463

3464+
static void
3465+
nvme_fc_delete_controllers(struct nvme_fc_rport *rport)
3466+
{
3467+
struct nvme_fc_ctrl *ctrl;
3468+
3469+
spin_lock(&rport->lock);
3470+
list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) {
3471+
dev_warn(ctrl->ctrl.device,
3472+
"NVME-FC{%d}: transport unloading: deleting ctrl\n",
3473+
ctrl->cnum);
3474+
nvme_delete_ctrl(&ctrl->ctrl);
3475+
}
3476+
spin_unlock(&rport->lock);
3477+
}
3478+
3479+
static void
3480+
nvme_fc_cleanup_for_unload(void)
3481+
{
3482+
struct nvme_fc_lport *lport;
3483+
struct nvme_fc_rport *rport;
3484+
3485+
list_for_each_entry(lport, &nvme_fc_lport_list, port_list) {
3486+
list_for_each_entry(rport, &lport->endp_list, endp_list) {
3487+
nvme_fc_delete_controllers(rport);
3488+
}
3489+
}
3490+
}
3491+
34593492
static void __exit nvme_fc_exit_module(void)
34603493
{
3461-
/* sanity check - all lports should be removed */
3462-
if (!list_empty(&nvme_fc_lport_list))
3463-
pr_warn("%s: localport list not empty\n", __func__);
3494+
unsigned long flags;
3495+
bool need_cleanup = false;
3496+
3497+
spin_lock_irqsave(&nvme_fc_lock, flags);
3498+
nvme_fc_waiting_to_unload = true;
3499+
if (!list_empty(&nvme_fc_lport_list)) {
3500+
need_cleanup = true;
3501+
nvme_fc_cleanup_for_unload();
3502+
}
3503+
spin_unlock_irqrestore(&nvme_fc_lock, flags);
3504+
if (need_cleanup) {
3505+
pr_info("%s: waiting for ctlr deletes\n", __func__);
3506+
wait_for_completion(&nvme_fc_unload_proceed);
3507+
pr_info("%s: ctrl deletes complete\n", __func__);
3508+
}
34643509

34653510
nvmf_unregister_transport(&nvme_fc_transport);
34663511

drivers/nvme/host/multipath.c

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -123,14 +123,20 @@ void nvme_mpath_clear_current_path(struct nvme_ns *ns)
123123
}
124124
}
125125

126+
static bool nvme_path_is_disabled(struct nvme_ns *ns)
127+
{
128+
return ns->ctrl->state != NVME_CTRL_LIVE ||
129+
test_bit(NVME_NS_ANA_PENDING, &ns->flags) ||
130+
test_bit(NVME_NS_REMOVING, &ns->flags);
131+
}
132+
126133
static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head, int node)
127134
{
128135
int found_distance = INT_MAX, fallback_distance = INT_MAX, distance;
129136
struct nvme_ns *found = NULL, *fallback = NULL, *ns;
130137

131138
list_for_each_entry_rcu(ns, &head->list, siblings) {
132-
if (ns->ctrl->state != NVME_CTRL_LIVE ||
133-
test_bit(NVME_NS_ANA_PENDING, &ns->flags))
139+
if (nvme_path_is_disabled(ns))
134140
continue;
135141

136142
if (READ_ONCE(head->subsys->iopolicy) == NVME_IOPOLICY_NUMA)
@@ -178,14 +184,16 @@ static struct nvme_ns *nvme_round_robin_path(struct nvme_ns_head *head,
178184
{
179185
struct nvme_ns *ns, *found, *fallback = NULL;
180186

181-
if (list_is_singular(&head->list))
187+
if (list_is_singular(&head->list)) {
188+
if (nvme_path_is_disabled(old))
189+
return NULL;
182190
return old;
191+
}
183192

184193
for (ns = nvme_next_ns(head, old);
185194
ns != old;
186195
ns = nvme_next_ns(head, ns)) {
187-
if (ns->ctrl->state != NVME_CTRL_LIVE ||
188-
test_bit(NVME_NS_ANA_PENDING, &ns->flags))
196+
if (nvme_path_is_disabled(ns))
189197
continue;
190198

191199
if (ns->ana_state == NVME_ANA_OPTIMIZED) {

drivers/nvme/host/nvme.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -283,6 +283,7 @@ struct nvme_subsystem {
283283
char firmware_rev[8];
284284
u8 cmic;
285285
u16 vendor_id;
286+
u16 awupf; /* 0's based awupf value. */
286287
struct ida ns_ida;
287288
#ifdef CONFIG_NVME_MULTIPATH
288289
enum nvme_iopolicy iopolicy;

drivers/nvme/host/pci.c

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1439,11 +1439,15 @@ static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq,
14391439

14401440
if (qid && dev->cmb_use_sqes && (dev->cmbsz & NVME_CMBSZ_SQS)) {
14411441
nvmeq->sq_cmds = pci_alloc_p2pmem(pdev, SQ_SIZE(depth));
1442-
nvmeq->sq_dma_addr = pci_p2pmem_virt_to_bus(pdev,
1443-
nvmeq->sq_cmds);
1444-
if (nvmeq->sq_dma_addr) {
1445-
set_bit(NVMEQ_SQ_CMB, &nvmeq->flags);
1446-
return 0;
1442+
if (nvmeq->sq_cmds) {
1443+
nvmeq->sq_dma_addr = pci_p2pmem_virt_to_bus(pdev,
1444+
nvmeq->sq_cmds);
1445+
if (nvmeq->sq_dma_addr) {
1446+
set_bit(NVMEQ_SQ_CMB, &nvmeq->flags);
1447+
return 0;
1448+
}
1449+
1450+
pci_free_p2pmem(pdev, nvmeq->sq_cmds, SQ_SIZE(depth));
14471451
}
14481452
}
14491453

@@ -2250,7 +2254,9 @@ static int nvme_dev_add(struct nvme_dev *dev)
22502254
if (!dev->ctrl.tagset) {
22512255
dev->tagset.ops = &nvme_mq_ops;
22522256
dev->tagset.nr_hw_queues = dev->online_queues - 1;
2253-
dev->tagset.nr_maps = 2; /* default + read */
2257+
dev->tagset.nr_maps = 1; /* default */
2258+
if (dev->io_queues[HCTX_TYPE_READ])
2259+
dev->tagset.nr_maps++;
22542260
if (dev->io_queues[HCTX_TYPE_POLL])
22552261
dev->tagset.nr_maps++;
22562262
dev->tagset.timeout = NVME_IO_TIMEOUT;
@@ -2289,8 +2295,7 @@ static int nvme_pci_enable(struct nvme_dev *dev)
22892295

22902296
pci_set_master(pdev);
22912297

2292-
if (dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(64)) &&
2293-
dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(32)))
2298+
if (dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(64)))
22942299
goto disable;
22952300

22962301
if (readl(dev->bar + NVME_REG_CSTS) == -1) {
@@ -2498,7 +2503,8 @@ static void nvme_reset_work(struct work_struct *work)
24982503
* Limit the max command size to prevent iod->sg allocations going
24992504
* over a single page.
25002505
*/
2501-
dev->ctrl.max_hw_sectors = NVME_MAX_KB_SZ << 1;
2506+
dev->ctrl.max_hw_sectors = min_t(u32,
2507+
NVME_MAX_KB_SZ << 1, dma_max_mapping_size(dev->dev) >> 9);
25022508
dev->ctrl.max_segments = NVME_MAX_SEGS;
25032509

25042510
/*
@@ -2923,7 +2929,7 @@ static int nvme_simple_resume(struct device *dev)
29232929
return 0;
29242930
}
29252931

2926-
const struct dev_pm_ops nvme_dev_pm_ops = {
2932+
static const struct dev_pm_ops nvme_dev_pm_ops = {
29272933
.suspend = nvme_suspend,
29282934
.resume = nvme_resume,
29292935
.freeze = nvme_simple_suspend,

drivers/nvme/host/tcp.c

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -860,7 +860,14 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
860860
else
861861
flags |= MSG_MORE;
862862

863-
ret = kernel_sendpage(queue->sock, page, offset, len, flags);
863+
/* can't zcopy slab pages */
864+
if (unlikely(PageSlab(page))) {
865+
ret = sock_no_sendpage(queue->sock, page, offset, len,
866+
flags);
867+
} else {
868+
ret = kernel_sendpage(queue->sock, page, offset, len,
869+
flags);
870+
}
864871
if (ret <= 0)
865872
return ret;
866873

drivers/nvme/host/trace.c

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,17 @@
77
#include <asm/unaligned.h>
88
#include "trace.h"
99

10+
static const char *nvme_trace_delete_sq(struct trace_seq *p, u8 *cdw10)
11+
{
12+
const char *ret = trace_seq_buffer_ptr(p);
13+
u16 sqid = get_unaligned_le16(cdw10);
14+
15+
trace_seq_printf(p, "sqid=%u", sqid);
16+
trace_seq_putc(p, 0);
17+
18+
return ret;
19+
}
20+
1021
static const char *nvme_trace_create_sq(struct trace_seq *p, u8 *cdw10)
1122
{
1223
const char *ret = trace_seq_buffer_ptr(p);
@@ -23,6 +34,17 @@ static const char *nvme_trace_create_sq(struct trace_seq *p, u8 *cdw10)
2334
return ret;
2435
}
2536

37+
static const char *nvme_trace_delete_cq(struct trace_seq *p, u8 *cdw10)
38+
{
39+
const char *ret = trace_seq_buffer_ptr(p);
40+
u16 cqid = get_unaligned_le16(cdw10);
41+
42+
trace_seq_printf(p, "cqid=%u", cqid);
43+
trace_seq_putc(p, 0);
44+
45+
return ret;
46+
}
47+
2648
static const char *nvme_trace_create_cq(struct trace_seq *p, u8 *cdw10)
2749
{
2850
const char *ret = trace_seq_buffer_ptr(p);
@@ -107,8 +129,12 @@ const char *nvme_trace_parse_admin_cmd(struct trace_seq *p,
107129
u8 opcode, u8 *cdw10)
108130
{
109131
switch (opcode) {
132+
case nvme_admin_delete_sq:
133+
return nvme_trace_delete_sq(p, cdw10);
110134
case nvme_admin_create_sq:
111135
return nvme_trace_create_sq(p, cdw10);
136+
case nvme_admin_delete_cq:
137+
return nvme_trace_delete_cq(p, cdw10);
112138
case nvme_admin_create_cq:
113139
return nvme_trace_create_cq(p, cdw10);
114140
case nvme_admin_identify:
@@ -178,7 +204,7 @@ static const char *nvme_trace_fabrics_common(struct trace_seq *p, u8 *spc)
178204
{
179205
const char *ret = trace_seq_buffer_ptr(p);
180206

181-
trace_seq_printf(p, "spcecific=%*ph", 24, spc);
207+
trace_seq_printf(p, "specific=%*ph", 24, spc);
182208
trace_seq_putc(p, 0);
183209
return ret;
184210
}

drivers/nvme/target/admin-cmd.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -442,6 +442,9 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req)
442442
break;
443443
}
444444

445+
if (ns->bdev)
446+
nvmet_bdev_set_limits(ns->bdev, id);
447+
445448
/*
446449
* We just provide a single LBA format that matches what the
447450
* underlying device reports.

drivers/nvme/target/configfs.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -588,8 +588,10 @@ static struct config_group *nvmet_ns_make(struct config_group *group,
588588
goto out;
589589

590590
ret = -EINVAL;
591-
if (nsid == 0 || nsid == NVME_NSID_ALL)
591+
if (nsid == 0 || nsid == NVME_NSID_ALL) {
592+
pr_err("invalid nsid %#x", nsid);
592593
goto out;
594+
}
593595

594596
ret = -ENOMEM;
595597
ns = nvmet_ns_alloc(subsys, nsid);

0 commit comments

Comments
 (0)