@@ -65,6 +65,7 @@ struct rds_ib_mr {
65
65
* Our own little FMR pool
66
66
*/
67
67
struct rds_ib_mr_pool {
68
+ unsigned int pool_type ;
68
69
struct mutex flush_lock ; /* serialize fmr invalidate */
69
70
struct delayed_work flush_worker ; /* flush worker */
70
71
@@ -234,43 +235,47 @@ void rds_ib_destroy_nodev_conns(void)
234
235
rds_conn_destroy (ic -> conn );
235
236
}
236
237
237
- struct rds_ib_mr_pool * rds_ib_create_mr_pool (struct rds_ib_device * rds_ibdev )
238
+ struct rds_ib_mr_pool * rds_ib_create_mr_pool (struct rds_ib_device * rds_ibdev ,
239
+ int pool_type )
238
240
{
239
241
struct rds_ib_mr_pool * pool ;
240
242
241
243
pool = kzalloc (sizeof (* pool ), GFP_KERNEL );
242
244
if (!pool )
243
245
return ERR_PTR (- ENOMEM );
244
246
247
+ pool -> pool_type = pool_type ;
245
248
init_llist_head (& pool -> free_list );
246
249
init_llist_head (& pool -> drop_list );
247
250
init_llist_head (& pool -> clean_list );
248
251
mutex_init (& pool -> flush_lock );
249
252
init_waitqueue_head (& pool -> flush_wait );
250
253
INIT_DELAYED_WORK (& pool -> flush_worker , rds_ib_mr_pool_flush_worker );
251
254
252
- pool -> fmr_attr .max_pages = fmr_message_size ;
255
+ if (pool_type == RDS_IB_MR_1M_POOL ) {
256
+ /* +1 allows for unaligned MRs */
257
+ pool -> fmr_attr .max_pages = RDS_FMR_1M_MSG_SIZE + 1 ;
258
+ pool -> max_items = RDS_FMR_1M_POOL_SIZE ;
259
+ } else {
260
+ /* pool_type == RDS_IB_MR_8K_POOL */
261
+ pool -> fmr_attr .max_pages = RDS_FMR_8K_MSG_SIZE + 1 ;
262
+ pool -> max_items = RDS_FMR_8K_POOL_SIZE ;
263
+ }
264
+
265
+ pool -> max_free_pinned = pool -> max_items * pool -> fmr_attr .max_pages / 4 ;
253
266
pool -> fmr_attr .max_maps = rds_ibdev -> fmr_max_remaps ;
254
267
pool -> fmr_attr .page_shift = PAGE_SHIFT ;
255
- pool -> max_free_pinned = rds_ibdev -> max_fmrs * fmr_message_size / 4 ;
256
-
257
- /* We never allow more than max_items MRs to be allocated.
258
- * When we exceed more than max_items_soft, we start freeing
259
- * items more aggressively.
260
- * Make sure that max_items > max_items_soft > max_items / 2
261
- */
262
268
pool -> max_items_soft = rds_ibdev -> max_fmrs * 3 / 4 ;
263
- pool -> max_items = rds_ibdev -> max_fmrs ;
264
269
265
270
return pool ;
266
271
}
267
272
268
273
void rds_ib_get_mr_info (struct rds_ib_device * rds_ibdev , struct rds_info_rdma_connection * iinfo )
269
274
{
270
- struct rds_ib_mr_pool * pool = rds_ibdev -> mr_pool ;
275
+ struct rds_ib_mr_pool * pool_1m = rds_ibdev -> mr_1m_pool ;
271
276
272
- iinfo -> rdma_mr_max = pool -> max_items ;
273
- iinfo -> rdma_mr_size = pool -> fmr_attr .max_pages ;
277
+ iinfo -> rdma_mr_max = pool_1m -> max_items ;
278
+ iinfo -> rdma_mr_size = pool_1m -> fmr_attr .max_pages ;
274
279
}
275
280
276
281
void rds_ib_destroy_mr_pool (struct rds_ib_mr_pool * pool )
@@ -312,15 +317,29 @@ static inline void wait_clean_list_grace(void)
312
317
}
313
318
}
314
319
315
- static struct rds_ib_mr * rds_ib_alloc_fmr (struct rds_ib_device * rds_ibdev )
320
+ static struct rds_ib_mr * rds_ib_alloc_fmr (struct rds_ib_device * rds_ibdev ,
321
+ int npages )
316
322
{
317
- struct rds_ib_mr_pool * pool = rds_ibdev -> mr_pool ;
323
+ struct rds_ib_mr_pool * pool ;
318
324
struct rds_ib_mr * ibmr = NULL ;
319
325
int err = 0 , iter = 0 ;
320
326
327
+ if (npages <= RDS_FMR_8K_MSG_SIZE )
328
+ pool = rds_ibdev -> mr_8k_pool ;
329
+ else
330
+ pool = rds_ibdev -> mr_1m_pool ;
331
+
321
332
if (atomic_read (& pool -> dirty_count ) >= pool -> max_items / 10 )
322
333
queue_delayed_work (rds_ib_fmr_wq , & pool -> flush_worker , 10 );
323
334
335
+ /* Switch pools if one of the pool is reaching upper limit */
336
+ if (atomic_read (& pool -> dirty_count ) >= pool -> max_items * 9 / 10 ) {
337
+ if (pool -> pool_type == RDS_IB_MR_8K_POOL )
338
+ pool = rds_ibdev -> mr_1m_pool ;
339
+ else
340
+ pool = rds_ibdev -> mr_8k_pool ;
341
+ }
342
+
324
343
while (1 ) {
325
344
ibmr = rds_ib_reuse_fmr (pool );
326
345
if (ibmr )
@@ -341,12 +360,18 @@ static struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev)
341
360
atomic_dec (& pool -> item_count );
342
361
343
362
if (++ iter > 2 ) {
344
- rds_ib_stats_inc (s_ib_rdma_mr_pool_depleted );
363
+ if (pool -> pool_type == RDS_IB_MR_8K_POOL )
364
+ rds_ib_stats_inc (s_ib_rdma_mr_8k_pool_depleted );
365
+ else
366
+ rds_ib_stats_inc (s_ib_rdma_mr_1m_pool_depleted );
345
367
return ERR_PTR (- EAGAIN );
346
368
}
347
369
348
370
/* We do have some empty MRs. Flush them out. */
349
- rds_ib_stats_inc (s_ib_rdma_mr_pool_wait );
371
+ if (pool -> pool_type == RDS_IB_MR_8K_POOL )
372
+ rds_ib_stats_inc (s_ib_rdma_mr_8k_pool_wait );
373
+ else
374
+ rds_ib_stats_inc (s_ib_rdma_mr_1m_pool_wait );
350
375
rds_ib_flush_mr_pool (pool , 0 , & ibmr );
351
376
if (ibmr )
352
377
return ibmr ;
@@ -371,7 +396,12 @@ static struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev)
371
396
goto out_no_cigar ;
372
397
}
373
398
374
- rds_ib_stats_inc (s_ib_rdma_mr_alloc );
399
+ ibmr -> pool = pool ;
400
+ if (pool -> pool_type == RDS_IB_MR_8K_POOL )
401
+ rds_ib_stats_inc (s_ib_rdma_mr_8k_alloc );
402
+ else
403
+ rds_ib_stats_inc (s_ib_rdma_mr_1m_alloc );
404
+
375
405
return ibmr ;
376
406
377
407
out_no_cigar :
@@ -427,7 +457,7 @@ static int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev, struct rds_ib_mr *ibm
427
457
}
428
458
429
459
page_cnt += len >> PAGE_SHIFT ;
430
- if (page_cnt > fmr_message_size )
460
+ if (page_cnt > ibmr -> pool -> fmr_attr . max_pages )
431
461
return - EINVAL ;
432
462
433
463
dma_pages = kmalloc_node (sizeof (u64 ) * page_cnt , GFP_ATOMIC ,
@@ -459,7 +489,10 @@ static int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev, struct rds_ib_mr *ibm
459
489
ibmr -> sg_dma_len = sg_dma_len ;
460
490
ibmr -> remap_count ++ ;
461
491
462
- rds_ib_stats_inc (s_ib_rdma_mr_used );
492
+ if (ibmr -> pool -> pool_type == RDS_IB_MR_8K_POOL )
493
+ rds_ib_stats_inc (s_ib_rdma_mr_8k_used );
494
+ else
495
+ rds_ib_stats_inc (s_ib_rdma_mr_1m_used );
463
496
ret = 0 ;
464
497
465
498
out :
@@ -591,7 +624,7 @@ static void list_to_llist_nodes(struct rds_ib_mr_pool *pool,
591
624
* to free as many MRs as needed to get back to this limit.
592
625
*/
593
626
static int rds_ib_flush_mr_pool (struct rds_ib_mr_pool * pool ,
594
- int free_all , struct rds_ib_mr * * ibmr_ret )
627
+ int free_all , struct rds_ib_mr * * ibmr_ret )
595
628
{
596
629
struct rds_ib_mr * ibmr , * next ;
597
630
struct llist_node * clean_nodes ;
@@ -602,11 +635,14 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool,
602
635
unsigned int nfreed = 0 , dirty_to_clean = 0 , free_goal ;
603
636
int ret = 0 ;
604
637
605
- rds_ib_stats_inc (s_ib_rdma_mr_pool_flush );
638
+ if (pool -> pool_type == RDS_IB_MR_8K_POOL )
639
+ rds_ib_stats_inc (s_ib_rdma_mr_8k_pool_flush );
640
+ else
641
+ rds_ib_stats_inc (s_ib_rdma_mr_1m_pool_flush );
606
642
607
643
if (ibmr_ret ) {
608
644
DEFINE_WAIT (wait );
609
- while (!mutex_trylock (& pool -> flush_lock )) {
645
+ while (!mutex_trylock (& pool -> flush_lock )) {
610
646
ibmr = rds_ib_reuse_fmr (pool );
611
647
if (ibmr ) {
612
648
* ibmr_ret = ibmr ;
@@ -663,8 +699,12 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool,
663
699
list_for_each_entry_safe (ibmr , next , & unmap_list , unmap_list ) {
664
700
unpinned += ibmr -> sg_len ;
665
701
__rds_ib_teardown_mr (ibmr );
666
- if (nfreed < free_goal || ibmr -> remap_count >= pool -> fmr_attr .max_maps ) {
667
- rds_ib_stats_inc (s_ib_rdma_mr_free );
702
+ if (nfreed < free_goal ||
703
+ ibmr -> remap_count >= pool -> fmr_attr .max_maps ) {
704
+ if (ibmr -> pool -> pool_type == RDS_IB_MR_8K_POOL )
705
+ rds_ib_stats_inc (s_ib_rdma_mr_8k_free );
706
+ else
707
+ rds_ib_stats_inc (s_ib_rdma_mr_1m_free );
668
708
list_del (& ibmr -> unmap_list );
669
709
ib_dealloc_fmr (ibmr -> fmr );
670
710
kfree (ibmr );
@@ -756,10 +796,11 @@ void rds_ib_flush_mrs(void)
756
796
757
797
down_read (& rds_ib_devices_lock );
758
798
list_for_each_entry (rds_ibdev , & rds_ib_devices , list ) {
759
- struct rds_ib_mr_pool * pool = rds_ibdev -> mr_pool ;
799
+ if (rds_ibdev -> mr_8k_pool )
800
+ rds_ib_flush_mr_pool (rds_ibdev -> mr_8k_pool , 0 , NULL );
760
801
761
- if (pool )
762
- rds_ib_flush_mr_pool (pool , 0 , NULL );
802
+ if (rds_ibdev -> mr_1m_pool )
803
+ rds_ib_flush_mr_pool (rds_ibdev -> mr_1m_pool , 0 , NULL );
763
804
}
764
805
up_read (& rds_ib_devices_lock );
765
806
}
@@ -777,12 +818,12 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
777
818
goto out ;
778
819
}
779
820
780
- if (!rds_ibdev -> mr_pool ) {
821
+ if (!rds_ibdev -> mr_8k_pool || ! rds_ibdev -> mr_1m_pool ) {
781
822
ret = - ENODEV ;
782
823
goto out ;
783
824
}
784
825
785
- ibmr = rds_ib_alloc_fmr (rds_ibdev );
826
+ ibmr = rds_ib_alloc_fmr (rds_ibdev , nents );
786
827
if (IS_ERR (ibmr )) {
787
828
rds_ib_dev_put (rds_ibdev );
788
829
return ibmr ;
0 commit comments