@@ -141,7 +141,10 @@ static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head, int node)
141
141
test_bit (NVME_NS_ANA_PENDING , & ns -> flags ))
142
142
continue ;
143
143
144
- distance = node_distance (node , ns -> ctrl -> numa_node );
144
+ if (READ_ONCE (head -> subsys -> iopolicy ) == NVME_IOPOLICY_NUMA )
145
+ distance = node_distance (node , ns -> ctrl -> numa_node );
146
+ else
147
+ distance = LOCAL_DISTANCE ;
145
148
146
149
switch (ns -> ana_state ) {
147
150
case NVME_ANA_OPTIMIZED :
@@ -168,6 +171,47 @@ static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head, int node)
168
171
return found ;
169
172
}
170
173
174
+ static struct nvme_ns * nvme_next_ns (struct nvme_ns_head * head ,
175
+ struct nvme_ns * ns )
176
+ {
177
+ ns = list_next_or_null_rcu (& head -> list , & ns -> siblings , struct nvme_ns ,
178
+ siblings );
179
+ if (ns )
180
+ return ns ;
181
+ return list_first_or_null_rcu (& head -> list , struct nvme_ns , siblings );
182
+ }
183
+
184
+ static struct nvme_ns * nvme_round_robin_path (struct nvme_ns_head * head ,
185
+ int node , struct nvme_ns * old )
186
+ {
187
+ struct nvme_ns * ns , * found , * fallback = NULL ;
188
+
189
+ if (list_is_singular (& head -> list ))
190
+ return old ;
191
+
192
+ for (ns = nvme_next_ns (head , old );
193
+ ns != old ;
194
+ ns = nvme_next_ns (head , ns )) {
195
+ if (ns -> ctrl -> state != NVME_CTRL_LIVE ||
196
+ test_bit (NVME_NS_ANA_PENDING , & ns -> flags ))
197
+ continue ;
198
+
199
+ if (ns -> ana_state == NVME_ANA_OPTIMIZED ) {
200
+ found = ns ;
201
+ goto out ;
202
+ }
203
+ if (ns -> ana_state == NVME_ANA_NONOPTIMIZED )
204
+ fallback = ns ;
205
+ }
206
+
207
+ if (!fallback )
208
+ return NULL ;
209
+ found = fallback ;
210
+ out :
211
+ rcu_assign_pointer (head -> current_path [node ], found );
212
+ return found ;
213
+ }
214
+
171
215
static inline bool nvme_path_is_optimized (struct nvme_ns * ns )
172
216
{
173
217
return ns -> ctrl -> state == NVME_CTRL_LIVE &&
@@ -180,6 +224,8 @@ inline struct nvme_ns *nvme_find_path(struct nvme_ns_head *head)
180
224
struct nvme_ns * ns ;
181
225
182
226
ns = srcu_dereference (head -> current_path [node ], & head -> srcu );
227
+ if (READ_ONCE (head -> subsys -> iopolicy ) == NVME_IOPOLICY_RR && ns )
228
+ ns = nvme_round_robin_path (head , node , ns );
183
229
if (unlikely (!ns || !nvme_path_is_optimized (ns )))
184
230
ns = __nvme_find_path (head , node );
185
231
return ns ;
@@ -471,6 +517,44 @@ void nvme_mpath_stop(struct nvme_ctrl *ctrl)
471
517
cancel_work_sync (& ctrl -> ana_work );
472
518
}
473
519
520
+ #define SUBSYS_ATTR_RW (_name , _mode , _show , _store ) \
521
+ struct device_attribute subsys_attr_##_name = \
522
+ __ATTR(_name, _mode, _show, _store)
523
+
524
+ static const char * nvme_iopolicy_names [] = {
525
+ [NVME_IOPOLICY_NUMA ] = "numa" ,
526
+ [NVME_IOPOLICY_RR ] = "round-robin" ,
527
+ };
528
+
529
+ static ssize_t nvme_subsys_iopolicy_show (struct device * dev ,
530
+ struct device_attribute * attr , char * buf )
531
+ {
532
+ struct nvme_subsystem * subsys =
533
+ container_of (dev , struct nvme_subsystem , dev );
534
+
535
+ return sprintf (buf , "%s\n" ,
536
+ nvme_iopolicy_names [READ_ONCE (subsys -> iopolicy )]);
537
+ }
538
+
539
+ static ssize_t nvme_subsys_iopolicy_store (struct device * dev ,
540
+ struct device_attribute * attr , const char * buf , size_t count )
541
+ {
542
+ struct nvme_subsystem * subsys =
543
+ container_of (dev , struct nvme_subsystem , dev );
544
+ int i ;
545
+
546
+ for (i = 0 ; i < ARRAY_SIZE (nvme_iopolicy_names ); i ++ ) {
547
+ if (sysfs_streq (buf , nvme_iopolicy_names [i ])) {
548
+ WRITE_ONCE (subsys -> iopolicy , i );
549
+ return count ;
550
+ }
551
+ }
552
+
553
+ return - EINVAL ;
554
+ }
555
+ SUBSYS_ATTR_RW (iopolicy , S_IRUGO | S_IWUSR ,
556
+ nvme_subsys_iopolicy_show , nvme_subsys_iopolicy_store );
557
+
474
558
static ssize_t ana_grpid_show (struct device * dev , struct device_attribute * attr ,
475
559
char * buf )
476
560
{
0 commit comments