Skip to content

Commit 75c10e7

Browse files
hreineckeChristoph Hellwig
authored andcommitted
nvme-multipath: round-robin I/O policy
Implement a simple round-robin I/O policy for multipathing. Path selection is done in two rounds, first iterating across all optimized paths, and if that doesn't return any valid paths, iterate over all optimized and non-optimized paths. If no paths are found, use the existing algorithm. Also add a sysfs attribute 'iopolicy' to switch between the current NUMA-aware I/O policy and the 'round-robin' I/O policy. Signed-off-by: Hannes Reinecke <[email protected]> Signed-off-by: Christoph Hellwig <[email protected]>
1 parent 49b1f22 commit 75c10e7

File tree

3 files changed

+100
-1
lines changed

3 files changed

+100
-1
lines changed

drivers/nvme/host/core.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2328,6 +2328,9 @@ static struct attribute *nvme_subsys_attrs[] = {
23282328
&subsys_attr_serial.attr,
23292329
&subsys_attr_firmware_rev.attr,
23302330
&subsys_attr_subsysnqn.attr,
2331+
#ifdef CONFIG_NVME_MULTIPATH
2332+
&subsys_attr_iopolicy.attr,
2333+
#endif
23312334
NULL,
23322335
};
23332336

@@ -2380,6 +2383,9 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
23802383
memcpy(subsys->firmware_rev, id->fr, sizeof(subsys->firmware_rev));
23812384
subsys->vendor_id = le16_to_cpu(id->vid);
23822385
subsys->cmic = id->cmic;
2386+
#ifdef CONFIG_NVME_MULTIPATH
2387+
subsys->iopolicy = NVME_IOPOLICY_NUMA;
2388+
#endif
23832389

23842390
subsys->dev.class = nvme_subsys_class;
23852391
subsys->dev.release = nvme_release_subsystem;

drivers/nvme/host/multipath.c

Lines changed: 85 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,10 @@ static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head, int node)
141141
test_bit(NVME_NS_ANA_PENDING, &ns->flags))
142142
continue;
143143

144-
distance = node_distance(node, ns->ctrl->numa_node);
144+
if (READ_ONCE(head->subsys->iopolicy) == NVME_IOPOLICY_NUMA)
145+
distance = node_distance(node, ns->ctrl->numa_node);
146+
else
147+
distance = LOCAL_DISTANCE;
145148

146149
switch (ns->ana_state) {
147150
case NVME_ANA_OPTIMIZED:
@@ -168,6 +171,47 @@ static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head, int node)
168171
return found;
169172
}
170173

174+
static struct nvme_ns *nvme_next_ns(struct nvme_ns_head *head,
175+
struct nvme_ns *ns)
176+
{
177+
ns = list_next_or_null_rcu(&head->list, &ns->siblings, struct nvme_ns,
178+
siblings);
179+
if (ns)
180+
return ns;
181+
return list_first_or_null_rcu(&head->list, struct nvme_ns, siblings);
182+
}
183+
184+
static struct nvme_ns *nvme_round_robin_path(struct nvme_ns_head *head,
185+
int node, struct nvme_ns *old)
186+
{
187+
struct nvme_ns *ns, *found, *fallback = NULL;
188+
189+
if (list_is_singular(&head->list))
190+
return old;
191+
192+
for (ns = nvme_next_ns(head, old);
193+
ns != old;
194+
ns = nvme_next_ns(head, ns)) {
195+
if (ns->ctrl->state != NVME_CTRL_LIVE ||
196+
test_bit(NVME_NS_ANA_PENDING, &ns->flags))
197+
continue;
198+
199+
if (ns->ana_state == NVME_ANA_OPTIMIZED) {
200+
found = ns;
201+
goto out;
202+
}
203+
if (ns->ana_state == NVME_ANA_NONOPTIMIZED)
204+
fallback = ns;
205+
}
206+
207+
if (!fallback)
208+
return NULL;
209+
found = fallback;
210+
out:
211+
rcu_assign_pointer(head->current_path[node], found);
212+
return found;
213+
}
214+
171215
static inline bool nvme_path_is_optimized(struct nvme_ns *ns)
172216
{
173217
return ns->ctrl->state == NVME_CTRL_LIVE &&
@@ -180,6 +224,8 @@ inline struct nvme_ns *nvme_find_path(struct nvme_ns_head *head)
180224
struct nvme_ns *ns;
181225

182226
ns = srcu_dereference(head->current_path[node], &head->srcu);
227+
if (READ_ONCE(head->subsys->iopolicy) == NVME_IOPOLICY_RR && ns)
228+
ns = nvme_round_robin_path(head, node, ns);
183229
if (unlikely(!ns || !nvme_path_is_optimized(ns)))
184230
ns = __nvme_find_path(head, node);
185231
return ns;
@@ -471,6 +517,44 @@ void nvme_mpath_stop(struct nvme_ctrl *ctrl)
471517
cancel_work_sync(&ctrl->ana_work);
472518
}
473519

520+
#define SUBSYS_ATTR_RW(_name, _mode, _show, _store) \
521+
struct device_attribute subsys_attr_##_name = \
522+
__ATTR(_name, _mode, _show, _store)
523+
524+
static const char *nvme_iopolicy_names[] = {
525+
[NVME_IOPOLICY_NUMA] = "numa",
526+
[NVME_IOPOLICY_RR] = "round-robin",
527+
};
528+
529+
static ssize_t nvme_subsys_iopolicy_show(struct device *dev,
530+
struct device_attribute *attr, char *buf)
531+
{
532+
struct nvme_subsystem *subsys =
533+
container_of(dev, struct nvme_subsystem, dev);
534+
535+
return sprintf(buf, "%s\n",
536+
nvme_iopolicy_names[READ_ONCE(subsys->iopolicy)]);
537+
}
538+
539+
static ssize_t nvme_subsys_iopolicy_store(struct device *dev,
540+
struct device_attribute *attr, const char *buf, size_t count)
541+
{
542+
struct nvme_subsystem *subsys =
543+
container_of(dev, struct nvme_subsystem, dev);
544+
int i;
545+
546+
for (i = 0; i < ARRAY_SIZE(nvme_iopolicy_names); i++) {
547+
if (sysfs_streq(buf, nvme_iopolicy_names[i])) {
548+
WRITE_ONCE(subsys->iopolicy, i);
549+
return count;
550+
}
551+
}
552+
553+
return -EINVAL;
554+
}
555+
SUBSYS_ATTR_RW(iopolicy, S_IRUGO | S_IWUSR,
556+
nvme_subsys_iopolicy_show, nvme_subsys_iopolicy_store);
557+
474558
static ssize_t ana_grpid_show(struct device *dev, struct device_attribute *attr,
475559
char *buf)
476560
{

drivers/nvme/host/nvme.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,11 @@ struct nvme_ctrl {
252252
unsigned long discard_page_busy;
253253
};
254254

255+
enum nvme_iopolicy {
256+
NVME_IOPOLICY_NUMA,
257+
NVME_IOPOLICY_RR,
258+
};
259+
255260
struct nvme_subsystem {
256261
int instance;
257262
struct device dev;
@@ -271,6 +276,9 @@ struct nvme_subsystem {
271276
u8 cmic;
272277
u16 vendor_id;
273278
struct ida ns_ida;
279+
#ifdef CONFIG_NVME_MULTIPATH
280+
enum nvme_iopolicy iopolicy;
281+
#endif
274282
};
275283

276284
/*
@@ -491,6 +499,7 @@ static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
491499

492500
extern struct device_attribute dev_attr_ana_grpid;
493501
extern struct device_attribute dev_attr_ana_state;
502+
extern struct device_attribute subsys_attr_iopolicy;
494503

495504
#else
496505
static inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl)

0 commit comments

Comments
 (0)