Skip to content

Commit 8450b32

Browse files
Hans Westgaard Ryjfvogel
authored andcommitted
RDS-IB: Add garbage-collection to cache
Add code to cleanup/empty percpu-cache. A worker-thread will run periodically and empty percpu-caches that hasn't been accessed since last time the worker was run. Orabug: 33426206 Signed-off-by: Hans Westgaard Ry <[email protected]> Reviewed-by: William Kucharski <[email protected]> Orabug: 33590097 UEK6 => UEK7 (cherry picked from commit 36dbe68) cherry-pick-repo=UEK/production/linux-uek.git Signed-off-by: Gerd Rausch <[email protected]> Reviewed-by: William Kucharski <[email protected]>
1 parent 8a68403 commit 8450b32

File tree

4 files changed

+137
-31
lines changed

4 files changed

+137
-31
lines changed

net/rds/ib.c

Lines changed: 107 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ unsigned int rds_ib_fmr_8k_pool_size = RDS_FMR_8K_POOL_SIZE;
4949
unsigned int rds_ib_retry_count = RDS_IB_DEFAULT_RETRY_COUNT;
5050
bool prefer_frwr;
5151
unsigned int rds_ib_rnr_retry_count = RDS_IB_DEFAULT_RNR_RETRY_COUNT;
52+
unsigned int rds_ib_cache_gc_interval = RDS_IB_DEFAULT_CACHE_GC_INTERVAL;
5253

5354
module_param(rds_ib_fmr_1m_pool_size, int, 0444);
5455
MODULE_PARM_DESC(rds_ib_fmr_1m_pool_size, " Max number of 1m fmr per HCA");
@@ -60,6 +61,8 @@ module_param(prefer_frwr, bool, 0444);
6061
MODULE_PARM_DESC(prefer_frwr, "Preference of FRWR over FMR for memory registration(Y/N)");
6162
module_param(rds_ib_rnr_retry_count, int, 0444);
6263
MODULE_PARM_DESC(rds_ib_rnr_retry_count, " QP rnr retry count");
64+
module_param(rds_ib_cache_gc_interval, int, 0444);
65+
MODULE_PARM_DESC(rds_ib_cache_gc_interval, " Cache cleanup interval in seconds");
6366

6467
/*
6568
* we have a clumsy combination of RCU and a rwsem protecting this list
@@ -99,6 +102,8 @@ static struct ib_mr *rds_ib_get_dma_mr(struct ib_pd *pd, int mr_access_flags)
99102
return mr;
100103
}
101104

105+
static void rds_ib_cache_gc_worker(struct work_struct *work);
106+
102107
static int ib_rds_cache_hit_show(struct seq_file *m, void *v)
103108
{
104109
struct rds_ib_device *rds_ibdev = m->private;
@@ -264,6 +269,7 @@ static int rds_ib_alloc_cache(struct rds_ib_refill_cache *cache)
264269
atomic_set(&head->count, 0);
265270
atomic64_set(&head->hit_count, 0);
266271
atomic64_set(&head->miss_count, 0);
272+
atomic64_set(&head->gc_count, 0);
267273
}
268274
lfstack_init(&cache->ready);
269275
atomic64_set(&cache->hit_count, 0);
@@ -289,6 +295,7 @@ static void rds_ib_free_cache(struct rds_ib_refill_cache *cache)
289295
cache->percpu = NULL;
290296
atomic64_set(&cache->hit_count, 0);
291297
atomic64_set(&cache->miss_count, 0);
298+
atomic64_set(&head->gc_count, 0);
292299
}
293300

294301
static int rds_ib_alloc_caches(struct rds_ib_device *rds_ibdev)
@@ -309,47 +316,84 @@ static int rds_ib_alloc_caches(struct rds_ib_device *rds_ibdev)
309316
goto out;
310317
}
311318
}
319+
INIT_DELAYED_WORK(&rds_ibdev->i_cache_gc_work, rds_ib_cache_gc_worker);
320+
rds_ibdev->i_cache_gc_cpu = 0;
321+
rds_queue_delayed_work(NULL, rds_aux_wq, &rds_ibdev->i_cache_gc_work,
322+
msecs_to_jiffies(rds_ib_cache_gc_interval * 1000),
323+
"Cache_Garbage_Collection");
312324
out:
313325
return ret;
314326
}
315327

328+
static inline void rds_ib_free_one_frag(struct rds_page_frag *frag, size_t cache_sz)
329+
{
330+
int cache_frag_pages = ceil(cache_sz, PAGE_SIZE);
331+
332+
frag->f_cache_entry.next = NULL;
333+
WARN_ON(!list_empty(&frag->f_item));
334+
rds_ib_recv_free_frag(frag, cache_frag_pages);
335+
atomic_sub(cache_frag_pages, &rds_ib_allocation);
336+
kmem_cache_free(rds_ib_frag_slab, frag);
337+
rds_ib_stats_inc(s_ib_recv_nmb_removed_from_cache);
338+
rds_ib_stats_add(s_ib_recv_removed_from_cache, cache_sz);
339+
}
340+
341+
static void rds_ib_free_frag_cache_one(struct rds_ib_refill_cache *cache, size_t cache_sz, int cpu)
342+
{
343+
struct lfstack_el *cache_item;
344+
struct rds_page_frag *frag;
345+
struct rds_ib_cache_head *head = per_cpu_ptr(cache->percpu, cpu);
346+
347+
trace_rds_ib_free_cache_one(head, cpu, "frag(s)");
348+
while ((cache_item = lfstack_pop(&head->stack))) {
349+
atomic_dec(&head->count);
350+
frag = container_of(cache_item, struct rds_page_frag, f_cache_entry);
351+
rds_ib_free_one_frag(frag, cache_sz);
352+
}
353+
}
354+
316355
static void rds_ib_free_frag_cache(struct rds_ib_refill_cache *cache, size_t cache_sz)
317356
{
318-
struct rds_ib_cache_head *head;
319357
int cpu;
358+
struct rds_ib_cache_head *head;
320359
struct lfstack_el *cache_item;
321360
struct rds_page_frag *frag;
322-
int cache_frag_pages = ceil(cache_sz, PAGE_SIZE);
323361

324362
for_each_possible_cpu(cpu) {
363+
rds_ib_free_frag_cache_one(cache, cache_sz, cpu);
325364
head = per_cpu_ptr(cache->percpu, cpu);
326-
while ((cache_item = lfstack_pop(&head->stack))) {
327-
frag = container_of(cache_item, struct rds_page_frag, f_cache_entry);
328-
frag->f_cache_entry.next = NULL;
329-
WARN_ON(!list_empty(&frag->f_item));
330-
rds_ib_recv_free_frag(frag, cache_frag_pages);
331-
atomic_sub(cache_frag_pages, &rds_ib_allocation);
332-
kmem_cache_free(rds_ib_frag_slab, frag);
333-
rds_ib_stats_inc(s_ib_recv_nmb_removed_from_cache);
334-
rds_ib_stats_add(s_ib_recv_removed_from_cache, cache_sz);
335-
}
336365
lfstack_free(&head->stack);
337366
atomic_set(&head->count, 0);
338367
}
339368
while ((cache_item = lfstack_pop(&cache->ready))) {
340369
frag = container_of(cache_item, struct rds_page_frag, f_cache_entry);
341-
frag->f_cache_entry.next = NULL;
342-
WARN_ON(!list_empty(&frag->f_item));
343-
rds_ib_recv_free_frag(frag, cache_frag_pages);
344-
atomic_sub(cache_frag_pages, &rds_ib_allocation);
345-
kmem_cache_free(rds_ib_frag_slab, frag);
346-
rds_ib_stats_inc(s_ib_recv_nmb_removed_from_cache);
347-
rds_ib_stats_add(s_ib_recv_removed_from_cache, cache_sz);
370+
rds_ib_free_one_frag(frag, cache_sz);
348371
}
349372
lfstack_free(&cache->ready);
350373
free_percpu(cache->percpu);
351374
}
352375

376+
static inline void rds_ib_free_one_inc(struct rds_ib_incoming *inc)
377+
{
378+
inc->ii_cache_entry.next = 0;
379+
WARN_ON(!list_empty(&inc->ii_frags));
380+
kmem_cache_free(rds_ib_incoming_slab, inc);
381+
}
382+
383+
static void rds_ib_free_inc_cache_one(struct rds_ib_refill_cache *cache, int cpu)
384+
{
385+
struct lfstack_el *cache_item;
386+
struct rds_ib_incoming *inc;
387+
struct rds_ib_cache_head *head = per_cpu_ptr(cache->percpu, cpu);
388+
389+
trace_rds_ib_free_cache_one(head, cpu, "inc(s)");
390+
while ((cache_item = lfstack_pop(&head->stack))) {
391+
atomic_dec(&head->count);
392+
inc = container_of(cache_item, struct rds_ib_incoming, ii_cache_entry);
393+
rds_ib_free_one_inc(inc);
394+
}
395+
}
396+
353397
static void rds_ib_free_inc_cache(struct rds_ib_refill_cache *cache)
354398
{
355399
struct rds_ib_cache_head *head;
@@ -358,21 +402,14 @@ static void rds_ib_free_inc_cache(struct rds_ib_refill_cache *cache)
358402
struct rds_ib_incoming *inc;
359403

360404
for_each_possible_cpu(cpu) {
405+
rds_ib_free_inc_cache_one(cache, cpu);
361406
head = per_cpu_ptr(cache->percpu, cpu);
362-
while ((cache_item = lfstack_pop(&head->stack))) {
363-
inc = container_of(cache_item, struct rds_ib_incoming, ii_cache_entry);
364-
inc->ii_cache_entry.next = 0;
365-
WARN_ON(!list_empty(&inc->ii_frags));
366-
kmem_cache_free(rds_ib_incoming_slab, inc);
367-
}
368407
lfstack_free(&head->stack);
369408
atomic_set(&head->count, 0);
370409
}
371410
while ((cache_item = lfstack_pop(&cache->ready))) {
372411
inc = container_of(cache_item, struct rds_ib_incoming, ii_cache_entry);
373-
inc->ii_cache_entry.next = 0;
374-
WARN_ON(!list_empty(&inc->ii_frags));
375-
kmem_cache_free(rds_ib_incoming_slab, inc);
412+
rds_ib_free_one_inc(inc);
376413
}
377414
lfstack_free(&cache->ready);
378415
free_percpu(cache->percpu);
@@ -382,12 +419,51 @@ static void rds_ib_free_caches(struct rds_ib_device *rds_ibdev)
382419
{
383420
int i;
384421

422+
cancel_delayed_work(&rds_ibdev->i_cache_gc_work);
385423
rds_ib_free_inc_cache(&rds_ibdev->i_cache_incs);
386-
for (i = 0; i < RDS_FRAG_CACHE_ENTRIES; i++) {
387-
size_t cache_sz = (1 << i) * PAGE_SIZE;
424+
for (i = 0; i < RDS_FRAG_CACHE_ENTRIES; i++)
425+
rds_ib_free_frag_cache(rds_ibdev->i_cache_frags + i, PAGE_SIZE << i);
426+
}
427+
428+
static bool rds_ib_cache_need_gc(struct rds_ib_refill_cache *cache, int cpu)
429+
{
430+
struct rds_ib_cache_head *head;
431+
u64 nmbr;
432+
bool ret;
388433

389-
rds_ib_free_frag_cache(rds_ibdev->i_cache_frags + i, cache_sz);
434+
head = per_cpu_ptr(cache->percpu, cpu);
435+
nmbr = atomic64_read(&head->miss_count) + atomic64_read(&head->hit_count);
436+
437+
ret = (atomic64_read(&head->gc_count) == nmbr && atomic_read(&head->count) > 0);
438+
atomic64_set(&head->gc_count, nmbr);
439+
return ret;
440+
}
441+
442+
static void rds_ib_cache_gc_worker(struct work_struct *work)
443+
{
444+
int i, j;
445+
int nmbr_to_check = num_possible_cpus() / 2;
446+
struct rds_ib_device *rds_ibdev = container_of(work,
447+
struct rds_ib_device,
448+
i_cache_gc_work.work);
449+
450+
for (j = 0; j < nmbr_to_check; j++) {
451+
if (rds_ib_cache_need_gc(&rds_ibdev->i_cache_incs, rds_ibdev->i_cache_gc_cpu))
452+
rds_ib_free_inc_cache_one(&rds_ibdev->i_cache_incs, rds_ibdev->i_cache_gc_cpu);
453+
454+
for (i = 0; i < RDS_FRAG_CACHE_ENTRIES; i++)
455+
if (rds_ib_cache_need_gc(rds_ibdev->i_cache_frags + i, rds_ibdev->i_cache_gc_cpu))
456+
rds_ib_free_frag_cache_one(rds_ibdev->i_cache_frags + i,
457+
PAGE_SIZE << i,
458+
rds_ibdev->i_cache_gc_cpu);
459+
460+
if (++rds_ibdev->i_cache_gc_cpu >= num_possible_cpus())
461+
rds_ibdev->i_cache_gc_cpu = 0;
390462
}
463+
464+
rds_queue_delayed_work(NULL, rds_aux_wq, &rds_ibdev->i_cache_gc_work,
465+
msecs_to_jiffies(rds_ib_cache_gc_interval * 1000),
466+
"Cache_Garbage_Collection");
391467
}
392468

393469
/* Reference counter for struct rds_ib_device on the module */

net/rds/ib.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@
3434

3535
#define RDS_IB_DEFAULT_RNR_RETRY_COUNT 7
3636

37+
#define RDS_IB_DEFAULT_CACHE_GC_INTERVAL 20
38+
3739
#define RDS_IB_DEFAULT_NUM_ARPS 100
3840

3941
#define RDS_IB_RX_LIMIT 10000
@@ -107,6 +109,7 @@ struct rds_ib_cache_head {
107109
atomic_t count;
108110
atomic64_t hit_count;
109111
atomic64_t miss_count;
112+
atomic64_t gc_count;
110113
};
111114

112115
struct rds_ib_refill_cache {
@@ -484,6 +487,8 @@ struct rds_ib_device {
484487
struct mutex free_dev_lock;
485488
struct rds_ib_refill_cache i_cache_incs;
486489
struct rds_ib_refill_cache i_cache_frags[RDS_FRAG_CACHE_ENTRIES];
490+
struct delayed_work i_cache_gc_work;
491+
int i_cache_gc_cpu;
487492
struct dentry *debugfs_dir;
488493

489494
atomic_t rid_refcount;

net/rds/trace.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,3 +70,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(rds_tcp_accept_err);
7070
EXPORT_TRACEPOINT_SYMBOL_GPL(rds_tcp_listen);
7171
EXPORT_TRACEPOINT_SYMBOL_GPL(rds_tcp_listen_err);
7272
EXPORT_TRACEPOINT_SYMBOL_GPL(rds_tcp_shutdown);
73+
EXPORT_TRACEPOINT_SYMBOL_GPL(rds_ib_free_cache_one);

net/rds/trace.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1461,6 +1461,30 @@ DEFINE_EVENT(rds_tcp, rds_tcp_shutdown,
14611461
TP_ARGS(conn, cp, tc, sk, reason, err)
14621462
);
14631463

1464+
TRACE_EVENT(rds_ib_free_cache_one,
1465+
1466+
TP_PROTO(struct rds_ib_cache_head *chead,
1467+
int cpu,
1468+
char *type),
1469+
1470+
TP_ARGS(chead, cpu, type),
1471+
1472+
TP_STRUCT__entry(
1473+
__field(__u16, cpu)
1474+
__field(__u16, count)
1475+
__array(char, type, RDS_STRSIZE)
1476+
),
1477+
1478+
TP_fast_assign(
1479+
__entry->cpu = cpu;
1480+
__entry->count = atomic_read(&chead->count);
1481+
RDS_STRLCPY(__entry->type, type);
1482+
),
1483+
1484+
TP_printk("RDS/IB: Free %d %s from percpu-%d",
1485+
__entry->count, __entry->type, __entry->cpu)
1486+
);
1487+
14641488
#endif /* _TRACE_RDS_H */
14651489

14661490
#undef TRACE_INCLUDE_PATH

0 commit comments

Comments
 (0)