Skip to content

Commit d956a04

Browse files
netoptimizerdavem330
authored andcommitted
xdp: force mem allocator removal and periodic warning
If bugs exists or are introduced later e.g. by drivers misusing the API, then we want to warn about the issue, such that developer notice. This patch will generate a bit of noise in form of periodic pr_warn every 30 seconds. It is not nice to have this stall warning running forever. Thus, this patch will (after 120 attempts) force disconnect the mem id (from the rhashtable) and free the page_pool object. This will cause fallback to the put_page() as before, which only potentially leak DMA-mappings, if objects are really stuck for this long. In that unlikely case, a WARN_ONCE should show us the call stack. Signed-off-by: Jesper Dangaard Brouer <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 99c07c4 commit d956a04

File tree

2 files changed

+48
-7
lines changed

2 files changed

+48
-7
lines changed

net/core/page_pool.c

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -330,11 +330,27 @@ static void __page_pool_empty_ring(struct page_pool *pool)
330330
}
331331
}
332332

333+
static void __warn_in_flight(struct page_pool *pool)
334+
{
335+
u32 release_cnt = atomic_read(&pool->pages_state_release_cnt);
336+
u32 hold_cnt = READ_ONCE(pool->pages_state_hold_cnt);
337+
s32 distance;
338+
339+
distance = _distance(hold_cnt, release_cnt);
340+
341+
/* Drivers should fix this, but only problematic when DMA is used */
342+
WARN(1, "Still in-flight pages:%d hold:%u released:%u",
343+
distance, hold_cnt, release_cnt);
344+
}
345+
333346
void __page_pool_free(struct page_pool *pool)
334347
{
335348
WARN(pool->alloc.count, "API usage violation");
336349
WARN(!ptr_ring_empty(&pool->ring), "ptr_ring is not empty");
337-
WARN(!__page_pool_safe_to_destroy(pool), "still in-flight pages");
350+
351+
/* Can happen due to forced shutdown */
352+
if (!__page_pool_safe_to_destroy(pool))
353+
__warn_in_flight(pool);
338354

339355
ptr_ring_cleanup(&pool->ring, NULL);
340356
kfree(pool);

net/core/xdp.c

Lines changed: 31 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,9 @@ struct xdp_mem_allocator {
3939
struct rhash_head node;
4040
struct rcu_head rcu;
4141
struct delayed_work defer_wq;
42+
unsigned long defer_start;
43+
unsigned long defer_warn;
44+
int disconnect_cnt;
4245
};
4346

4447
static u32 xdp_mem_id_hashfn(const void *data, u32 len, u32 seed)
@@ -95,7 +98,7 @@ static void __xdp_mem_allocator_rcu_free(struct rcu_head *rcu)
9598
kfree(xa);
9699
}
97100

98-
bool __mem_id_disconnect(int id)
101+
bool __mem_id_disconnect(int id, bool force)
99102
{
100103
struct xdp_mem_allocator *xa;
101104
bool safe_to_remove = true;
@@ -108,29 +111,47 @@ bool __mem_id_disconnect(int id)
108111
WARN(1, "Request remove non-existing id(%d), driver bug?", id);
109112
return true;
110113
}
114+
xa->disconnect_cnt++;
111115

112116
/* Detects in-flight packet-pages for page_pool */
113117
if (xa->mem.type == MEM_TYPE_PAGE_POOL)
114118
safe_to_remove = page_pool_request_shutdown(xa->page_pool);
115119

116-
if (safe_to_remove &&
120+
/* TODO: Tracepoint will be added here in next-patch */
121+
122+
if ((safe_to_remove || force) &&
117123
!rhashtable_remove_fast(mem_id_ht, &xa->node, mem_id_rht_params))
118124
call_rcu(&xa->rcu, __xdp_mem_allocator_rcu_free);
119125

120126
mutex_unlock(&mem_id_lock);
121-
return safe_to_remove;
127+
return (safe_to_remove|force);
122128
}
123129

124130
#define DEFER_TIME (msecs_to_jiffies(1000))
131+
#define DEFER_WARN_INTERVAL (30 * HZ)
132+
#define DEFER_MAX_RETRIES 120
125133

126134
static void mem_id_disconnect_defer_retry(struct work_struct *wq)
127135
{
128136
struct delayed_work *dwq = to_delayed_work(wq);
129137
struct xdp_mem_allocator *xa = container_of(dwq, typeof(*xa), defer_wq);
138+
bool force = false;
139+
140+
if (xa->disconnect_cnt > DEFER_MAX_RETRIES)
141+
force = true;
130142

131-
if (__mem_id_disconnect(xa->mem.id))
143+
if (__mem_id_disconnect(xa->mem.id, force))
132144
return;
133145

146+
/* Periodic warning */
147+
if (time_after_eq(jiffies, xa->defer_warn)) {
148+
int sec = (s32)((u32)jiffies - (u32)xa->defer_start) / HZ;
149+
150+
pr_warn("%s() stalled mem.id=%u shutdown %d attempts %d sec\n",
151+
__func__, xa->mem.id, xa->disconnect_cnt, sec);
152+
xa->defer_warn = jiffies + DEFER_WARN_INTERVAL;
153+
}
154+
134155
/* Still not ready to be disconnected, retry later */
135156
schedule_delayed_work(&xa->defer_wq, DEFER_TIME);
136157
}
@@ -153,7 +174,7 @@ void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq)
153174
if (id == 0)
154175
return;
155176

156-
if (__mem_id_disconnect(id))
177+
if (__mem_id_disconnect(id, false))
157178
return;
158179

159180
/* Could not disconnect, defer new disconnect attempt to later */
@@ -164,6 +185,8 @@ void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq)
164185
mutex_unlock(&mem_id_lock);
165186
return;
166187
}
188+
xa->defer_start = jiffies;
189+
xa->defer_warn = jiffies + DEFER_WARN_INTERVAL;
167190

168191
INIT_DELAYED_WORK(&xa->defer_wq, mem_id_disconnect_defer_retry);
169192
mutex_unlock(&mem_id_lock);
@@ -388,10 +411,12 @@ static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
388411
/* mem->id is valid, checked in xdp_rxq_info_reg_mem_model() */
389412
xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
390413
page = virt_to_head_page(data);
391-
if (xa) {
414+
if (likely(xa)) {
392415
napi_direct &= !xdp_return_frame_no_direct();
393416
page_pool_put_page(xa->page_pool, page, napi_direct);
394417
} else {
418+
/* Hopefully stack show who to blame for late return */
419+
WARN_ONCE(1, "page_pool gone mem.id=%d", mem->id);
395420
put_page(page);
396421
}
397422
rcu_read_unlock();

0 commit comments

Comments
 (0)