@@ -49,6 +49,7 @@ unsigned int rds_ib_fmr_8k_pool_size = RDS_FMR_8K_POOL_SIZE;
49
49
unsigned int rds_ib_retry_count = RDS_IB_DEFAULT_RETRY_COUNT ;
50
50
bool prefer_frwr ;
51
51
unsigned int rds_ib_rnr_retry_count = RDS_IB_DEFAULT_RNR_RETRY_COUNT ;
52
+ unsigned int rds_ib_cache_gc_interval = RDS_IB_DEFAULT_CACHE_GC_INTERVAL ;
52
53
53
54
module_param (rds_ib_fmr_1m_pool_size , int , 0444 );
54
55
MODULE_PARM_DESC (rds_ib_fmr_1m_pool_size , " Max number of 1m fmr per HCA" );
@@ -60,6 +61,8 @@ module_param(prefer_frwr, bool, 0444);
60
61
MODULE_PARM_DESC (prefer_frwr , "Preference of FRWR over FMR for memory registration(Y/N)" );
61
62
module_param (rds_ib_rnr_retry_count , int , 0444 );
62
63
MODULE_PARM_DESC (rds_ib_rnr_retry_count , " QP rnr retry count" );
64
+ module_param (rds_ib_cache_gc_interval , int , 0444 );
65
+ MODULE_PARM_DESC (rds_ib_cache_gc_interval , " Cache cleanup interval in seconds" );
63
66
64
67
/*
65
68
* we have a clumsy combination of RCU and a rwsem protecting this list
@@ -99,6 +102,8 @@ static struct ib_mr *rds_ib_get_dma_mr(struct ib_pd *pd, int mr_access_flags)
99
102
return mr ;
100
103
}
101
104
105
+ static void rds_ib_cache_gc_worker (struct work_struct * work );
106
+
102
107
static int ib_rds_cache_hit_show (struct seq_file * m , void * v )
103
108
{
104
109
struct rds_ib_device * rds_ibdev = m -> private ;
@@ -264,6 +269,7 @@ static int rds_ib_alloc_cache(struct rds_ib_refill_cache *cache)
264
269
atomic_set (& head -> count , 0 );
265
270
atomic64_set (& head -> hit_count , 0 );
266
271
atomic64_set (& head -> miss_count , 0 );
272
+ atomic64_set (& head -> gc_count , 0 );
267
273
}
268
274
lfstack_init (& cache -> ready );
269
275
atomic64_set (& cache -> hit_count , 0 );
@@ -289,6 +295,7 @@ static void rds_ib_free_cache(struct rds_ib_refill_cache *cache)
289
295
cache -> percpu = NULL ;
290
296
atomic64_set (& cache -> hit_count , 0 );
291
297
atomic64_set (& cache -> miss_count , 0 );
298
+ atomic64_set (& head -> gc_count , 0 );
292
299
}
293
300
294
301
static int rds_ib_alloc_caches (struct rds_ib_device * rds_ibdev )
@@ -309,47 +316,84 @@ static int rds_ib_alloc_caches(struct rds_ib_device *rds_ibdev)
309
316
goto out ;
310
317
}
311
318
}
319
+ INIT_DELAYED_WORK (& rds_ibdev -> i_cache_gc_work , rds_ib_cache_gc_worker );
320
+ rds_ibdev -> i_cache_gc_cpu = 0 ;
321
+ rds_queue_delayed_work (NULL , rds_aux_wq , & rds_ibdev -> i_cache_gc_work ,
322
+ msecs_to_jiffies (rds_ib_cache_gc_interval * 1000 ),
323
+ "Cache_Garbage_Collection" );
312
324
out :
313
325
return ret ;
314
326
}
315
327
328
+ static inline void rds_ib_free_one_frag (struct rds_page_frag * frag , size_t cache_sz )
329
+ {
330
+ int cache_frag_pages = ceil (cache_sz , PAGE_SIZE );
331
+
332
+ frag -> f_cache_entry .next = NULL ;
333
+ WARN_ON (!list_empty (& frag -> f_item ));
334
+ rds_ib_recv_free_frag (frag , cache_frag_pages );
335
+ atomic_sub (cache_frag_pages , & rds_ib_allocation );
336
+ kmem_cache_free (rds_ib_frag_slab , frag );
337
+ rds_ib_stats_inc (s_ib_recv_nmb_removed_from_cache );
338
+ rds_ib_stats_add (s_ib_recv_removed_from_cache , cache_sz );
339
+ }
340
+
341
+ static void rds_ib_free_frag_cache_one (struct rds_ib_refill_cache * cache , size_t cache_sz , int cpu )
342
+ {
343
+ struct lfstack_el * cache_item ;
344
+ struct rds_page_frag * frag ;
345
+ struct rds_ib_cache_head * head = per_cpu_ptr (cache -> percpu , cpu );
346
+
347
+ trace_rds_ib_free_cache_one (head , cpu , "frag(s)" );
348
+ while ((cache_item = lfstack_pop (& head -> stack ))) {
349
+ atomic_dec (& head -> count );
350
+ frag = container_of (cache_item , struct rds_page_frag , f_cache_entry );
351
+ rds_ib_free_one_frag (frag , cache_sz );
352
+ }
353
+ }
354
+
316
355
static void rds_ib_free_frag_cache (struct rds_ib_refill_cache * cache , size_t cache_sz )
317
356
{
318
- struct rds_ib_cache_head * head ;
319
357
int cpu ;
358
+ struct rds_ib_cache_head * head ;
320
359
struct lfstack_el * cache_item ;
321
360
struct rds_page_frag * frag ;
322
- int cache_frag_pages = ceil (cache_sz , PAGE_SIZE );
323
361
324
362
for_each_possible_cpu (cpu ) {
363
+ rds_ib_free_frag_cache_one (cache , cache_sz , cpu );
325
364
head = per_cpu_ptr (cache -> percpu , cpu );
326
- while ((cache_item = lfstack_pop (& head -> stack ))) {
327
- frag = container_of (cache_item , struct rds_page_frag , f_cache_entry );
328
- frag -> f_cache_entry .next = NULL ;
329
- WARN_ON (!list_empty (& frag -> f_item ));
330
- rds_ib_recv_free_frag (frag , cache_frag_pages );
331
- atomic_sub (cache_frag_pages , & rds_ib_allocation );
332
- kmem_cache_free (rds_ib_frag_slab , frag );
333
- rds_ib_stats_inc (s_ib_recv_nmb_removed_from_cache );
334
- rds_ib_stats_add (s_ib_recv_removed_from_cache , cache_sz );
335
- }
336
365
lfstack_free (& head -> stack );
337
366
atomic_set (& head -> count , 0 );
338
367
}
339
368
while ((cache_item = lfstack_pop (& cache -> ready ))) {
340
369
frag = container_of (cache_item , struct rds_page_frag , f_cache_entry );
341
- frag -> f_cache_entry .next = NULL ;
342
- WARN_ON (!list_empty (& frag -> f_item ));
343
- rds_ib_recv_free_frag (frag , cache_frag_pages );
344
- atomic_sub (cache_frag_pages , & rds_ib_allocation );
345
- kmem_cache_free (rds_ib_frag_slab , frag );
346
- rds_ib_stats_inc (s_ib_recv_nmb_removed_from_cache );
347
- rds_ib_stats_add (s_ib_recv_removed_from_cache , cache_sz );
370
+ rds_ib_free_one_frag (frag , cache_sz );
348
371
}
349
372
lfstack_free (& cache -> ready );
350
373
free_percpu (cache -> percpu );
351
374
}
352
375
376
+ static inline void rds_ib_free_one_inc (struct rds_ib_incoming * inc )
377
+ {
378
+ inc -> ii_cache_entry .next = 0 ;
379
+ WARN_ON (!list_empty (& inc -> ii_frags ));
380
+ kmem_cache_free (rds_ib_incoming_slab , inc );
381
+ }
382
+
383
+ static void rds_ib_free_inc_cache_one (struct rds_ib_refill_cache * cache , int cpu )
384
+ {
385
+ struct lfstack_el * cache_item ;
386
+ struct rds_ib_incoming * inc ;
387
+ struct rds_ib_cache_head * head = per_cpu_ptr (cache -> percpu , cpu );
388
+
389
+ trace_rds_ib_free_cache_one (head , cpu , "inc(s)" );
390
+ while ((cache_item = lfstack_pop (& head -> stack ))) {
391
+ atomic_dec (& head -> count );
392
+ inc = container_of (cache_item , struct rds_ib_incoming , ii_cache_entry );
393
+ rds_ib_free_one_inc (inc );
394
+ }
395
+ }
396
+
353
397
static void rds_ib_free_inc_cache (struct rds_ib_refill_cache * cache )
354
398
{
355
399
struct rds_ib_cache_head * head ;
@@ -358,21 +402,14 @@ static void rds_ib_free_inc_cache(struct rds_ib_refill_cache *cache)
358
402
struct rds_ib_incoming * inc ;
359
403
360
404
for_each_possible_cpu (cpu ) {
405
+ rds_ib_free_inc_cache_one (cache , cpu );
361
406
head = per_cpu_ptr (cache -> percpu , cpu );
362
- while ((cache_item = lfstack_pop (& head -> stack ))) {
363
- inc = container_of (cache_item , struct rds_ib_incoming , ii_cache_entry );
364
- inc -> ii_cache_entry .next = 0 ;
365
- WARN_ON (!list_empty (& inc -> ii_frags ));
366
- kmem_cache_free (rds_ib_incoming_slab , inc );
367
- }
368
407
lfstack_free (& head -> stack );
369
408
atomic_set (& head -> count , 0 );
370
409
}
371
410
while ((cache_item = lfstack_pop (& cache -> ready ))) {
372
411
inc = container_of (cache_item , struct rds_ib_incoming , ii_cache_entry );
373
- inc -> ii_cache_entry .next = 0 ;
374
- WARN_ON (!list_empty (& inc -> ii_frags ));
375
- kmem_cache_free (rds_ib_incoming_slab , inc );
412
+ rds_ib_free_one_inc (inc );
376
413
}
377
414
lfstack_free (& cache -> ready );
378
415
free_percpu (cache -> percpu );
@@ -382,12 +419,51 @@ static void rds_ib_free_caches(struct rds_ib_device *rds_ibdev)
382
419
{
383
420
int i ;
384
421
422
+ cancel_delayed_work (& rds_ibdev -> i_cache_gc_work );
385
423
rds_ib_free_inc_cache (& rds_ibdev -> i_cache_incs );
386
- for (i = 0 ; i < RDS_FRAG_CACHE_ENTRIES ; i ++ ) {
387
- size_t cache_sz = (1 << i ) * PAGE_SIZE ;
424
+ for (i = 0 ; i < RDS_FRAG_CACHE_ENTRIES ; i ++ )
425
+ rds_ib_free_frag_cache (rds_ibdev -> i_cache_frags + i , PAGE_SIZE << i );
426
+ }
427
+
428
+ static bool rds_ib_cache_need_gc (struct rds_ib_refill_cache * cache , int cpu )
429
+ {
430
+ struct rds_ib_cache_head * head ;
431
+ u64 nmbr ;
432
+ bool ret ;
388
433
389
- rds_ib_free_frag_cache (rds_ibdev -> i_cache_frags + i , cache_sz );
434
+ head = per_cpu_ptr (cache -> percpu , cpu );
435
+ nmbr = atomic64_read (& head -> miss_count ) + atomic64_read (& head -> hit_count );
436
+
437
+ ret = (atomic64_read (& head -> gc_count ) == nmbr && atomic_read (& head -> count ) > 0 );
438
+ atomic64_set (& head -> gc_count , nmbr );
439
+ return ret ;
440
+ }
441
+
442
+ static void rds_ib_cache_gc_worker (struct work_struct * work )
443
+ {
444
+ int i , j ;
445
+ int nmbr_to_check = num_possible_cpus () / 2 ;
446
+ struct rds_ib_device * rds_ibdev = container_of (work ,
447
+ struct rds_ib_device ,
448
+ i_cache_gc_work .work );
449
+
450
+ for (j = 0 ; j < nmbr_to_check ; j ++ ) {
451
+ if (rds_ib_cache_need_gc (& rds_ibdev -> i_cache_incs , rds_ibdev -> i_cache_gc_cpu ))
452
+ rds_ib_free_inc_cache_one (& rds_ibdev -> i_cache_incs , rds_ibdev -> i_cache_gc_cpu );
453
+
454
+ for (i = 0 ; i < RDS_FRAG_CACHE_ENTRIES ; i ++ )
455
+ if (rds_ib_cache_need_gc (rds_ibdev -> i_cache_frags + i , rds_ibdev -> i_cache_gc_cpu ))
456
+ rds_ib_free_frag_cache_one (rds_ibdev -> i_cache_frags + i ,
457
+ PAGE_SIZE << i ,
458
+ rds_ibdev -> i_cache_gc_cpu );
459
+
460
+ if (++ rds_ibdev -> i_cache_gc_cpu >= num_possible_cpus ())
461
+ rds_ibdev -> i_cache_gc_cpu = 0 ;
390
462
}
463
+
464
+ rds_queue_delayed_work (NULL , rds_aux_wq , & rds_ibdev -> i_cache_gc_work ,
465
+ msecs_to_jiffies (rds_ib_cache_gc_interval * 1000 ),
466
+ "Cache_Garbage_Collection" );
391
467
}
392
468
393
469
/* Reference counter for struct rds_ib_device on the module */
0 commit comments