Skip to content

Commit fdf4578

Browse files
author
Alexei Starovoitov
committed
Merge branch 'bpf: Avoid unnecessary deadlock detection and failure in task storage'
Martin KaFai Lau says: ==================== From: Martin KaFai Lau <[email protected]> The commit bc235cd ("bpf: Prevent deadlock from recursive bpf_task_storage_[get|delete]") added deadlock detection to avoid a tracing program from recurring on the bpf_task_storage_{get,delete}() helpers. These helpers acquire a spin lock and it will lead to deadlock. It is unnecessary for the bpf_lsm and bpf_iter programs which do not recur. The situation is the same as the existing bpf_pid_task_storage_{lookup,delete}_elem() which are used in the syscall and they also do not have deadlock detection. This set is to add new bpf_task_storage_{get,delete}() helper proto without the deadlock detection. The set also removes the prog->active check from the bpf_lsm and bpf_iter program. Please see the individual patch for details. ==================== Signed-off-by: Alexei Starovoitov <[email protected]>
2 parents f3c51fe + 387b532 commit fdf4578

File tree

12 files changed

+431
-84
lines changed

12 files changed

+431
-84
lines changed

arch/arm64/net/bpf_jit_comp.c

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1649,13 +1649,8 @@ static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l,
16491649
struct bpf_prog *p = l->link.prog;
16501650
int cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie);
16511651

1652-
if (p->aux->sleepable) {
1653-
enter_prog = (u64)__bpf_prog_enter_sleepable;
1654-
exit_prog = (u64)__bpf_prog_exit_sleepable;
1655-
} else {
1656-
enter_prog = (u64)__bpf_prog_enter;
1657-
exit_prog = (u64)__bpf_prog_exit;
1658-
}
1652+
enter_prog = (u64)bpf_trampoline_enter(p);
1653+
exit_prog = (u64)bpf_trampoline_exit(p);
16591654

16601655
if (l->cookie == 0) {
16611656
/* if cookie is zero, one instruction is enough to store it */

arch/x86/net/bpf_jit_comp.c

Lines changed: 2 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1894,10 +1894,6 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
18941894
struct bpf_tramp_link *l, int stack_size,
18951895
int run_ctx_off, bool save_ret)
18961896
{
1897-
void (*exit)(struct bpf_prog *prog, u64 start,
1898-
struct bpf_tramp_run_ctx *run_ctx) = __bpf_prog_exit;
1899-
u64 (*enter)(struct bpf_prog *prog,
1900-
struct bpf_tramp_run_ctx *run_ctx) = __bpf_prog_enter;
19011897
u8 *prog = *pprog;
19021898
u8 *jmp_insn;
19031899
int ctx_cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie);
@@ -1916,23 +1912,12 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
19161912
*/
19171913
emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_1, -run_ctx_off + ctx_cookie_off);
19181914

1919-
if (p->aux->sleepable) {
1920-
enter = __bpf_prog_enter_sleepable;
1921-
exit = __bpf_prog_exit_sleepable;
1922-
} else if (p->type == BPF_PROG_TYPE_STRUCT_OPS) {
1923-
enter = __bpf_prog_enter_struct_ops;
1924-
exit = __bpf_prog_exit_struct_ops;
1925-
} else if (p->expected_attach_type == BPF_LSM_CGROUP) {
1926-
enter = __bpf_prog_enter_lsm_cgroup;
1927-
exit = __bpf_prog_exit_lsm_cgroup;
1928-
}
1929-
19301915
/* arg1: mov rdi, progs[i] */
19311916
emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p);
19321917
/* arg2: lea rsi, [rbp - ctx_cookie_off] */
19331918
EMIT4(0x48, 0x8D, 0x75, -run_ctx_off);
19341919

1935-
if (emit_call(&prog, enter, prog))
1920+
if (emit_call(&prog, bpf_trampoline_enter(p), prog))
19361921
return -EINVAL;
19371922
/* remember prog start time returned by __bpf_prog_enter */
19381923
emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0);
@@ -1977,7 +1962,7 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
19771962
emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6);
19781963
/* arg3: lea rdx, [rbp - run_ctx_off] */
19791964
EMIT4(0x48, 0x8D, 0x55, -run_ctx_off);
1980-
if (emit_call(&prog, exit, prog))
1965+
if (emit_call(&prog, bpf_trampoline_exit(p), prog))
19811966
return -EINVAL;
19821967

19831968
*pprog = prog;

include/linux/bpf.h

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -854,22 +854,18 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *tr, void *image, void *i
854854
const struct btf_func_model *m, u32 flags,
855855
struct bpf_tramp_links *tlinks,
856856
void *orig_call);
857-
/* these two functions are called from generated trampoline */
858-
u64 notrace __bpf_prog_enter(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx);
859-
void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start, struct bpf_tramp_run_ctx *run_ctx);
860-
u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx);
861-
void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start,
862-
struct bpf_tramp_run_ctx *run_ctx);
863-
u64 notrace __bpf_prog_enter_lsm_cgroup(struct bpf_prog *prog,
864-
struct bpf_tramp_run_ctx *run_ctx);
865-
void notrace __bpf_prog_exit_lsm_cgroup(struct bpf_prog *prog, u64 start,
866-
struct bpf_tramp_run_ctx *run_ctx);
867-
u64 notrace __bpf_prog_enter_struct_ops(struct bpf_prog *prog,
868-
struct bpf_tramp_run_ctx *run_ctx);
869-
void notrace __bpf_prog_exit_struct_ops(struct bpf_prog *prog, u64 start,
870-
struct bpf_tramp_run_ctx *run_ctx);
857+
u64 notrace __bpf_prog_enter_sleepable_recur(struct bpf_prog *prog,
858+
struct bpf_tramp_run_ctx *run_ctx);
859+
void notrace __bpf_prog_exit_sleepable_recur(struct bpf_prog *prog, u64 start,
860+
struct bpf_tramp_run_ctx *run_ctx);
871861
void notrace __bpf_tramp_enter(struct bpf_tramp_image *tr);
872862
void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr);
863+
typedef u64 (*bpf_trampoline_enter_t)(struct bpf_prog *prog,
864+
struct bpf_tramp_run_ctx *run_ctx);
865+
typedef void (*bpf_trampoline_exit_t)(struct bpf_prog *prog, u64 start,
866+
struct bpf_tramp_run_ctx *run_ctx);
867+
bpf_trampoline_enter_t bpf_trampoline_enter(const struct bpf_prog *prog);
868+
bpf_trampoline_exit_t bpf_trampoline_exit(const struct bpf_prog *prog);
873869

874870
struct bpf_ksym {
875871
unsigned long start;
@@ -2523,7 +2519,9 @@ extern const struct bpf_func_proto bpf_this_cpu_ptr_proto;
25232519
extern const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto;
25242520
extern const struct bpf_func_proto bpf_sock_from_file_proto;
25252521
extern const struct bpf_func_proto bpf_get_socket_ptr_cookie_proto;
2522+
extern const struct bpf_func_proto bpf_task_storage_get_recur_proto;
25262523
extern const struct bpf_func_proto bpf_task_storage_get_proto;
2524+
extern const struct bpf_func_proto bpf_task_storage_delete_recur_proto;
25272525
extern const struct bpf_func_proto bpf_task_storage_delete_proto;
25282526
extern const struct bpf_func_proto bpf_for_each_map_elem_proto;
25292527
extern const struct bpf_func_proto bpf_btf_find_by_name_kind_proto;

include/linux/bpf_verifier.h

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -642,10 +642,23 @@ static inline u32 type_flag(u32 type)
642642
}
643643

644644
/* only use after check_attach_btf_id() */
645-
static inline enum bpf_prog_type resolve_prog_type(struct bpf_prog *prog)
645+
static inline enum bpf_prog_type resolve_prog_type(const struct bpf_prog *prog)
646646
{
647647
return prog->type == BPF_PROG_TYPE_EXT ?
648648
prog->aux->dst_prog->type : prog->type;
649649
}
650650

651+
static inline bool bpf_prog_check_recur(const struct bpf_prog *prog)
652+
{
653+
switch (resolve_prog_type(prog)) {
654+
case BPF_PROG_TYPE_TRACING:
655+
return prog->expected_attach_type != BPF_TRACE_ITER;
656+
case BPF_PROG_TYPE_STRUCT_OPS:
657+
case BPF_PROG_TYPE_LSM:
658+
return false;
659+
default:
660+
return true;
661+
}
662+
}
663+
651664
#endif /* _LINUX_BPF_VERIFIER_H */

kernel/bpf/bpf_local_storage.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,7 @@ void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool use_trace_rcu)
242242
__bpf_selem_unlink_storage(selem, use_trace_rcu);
243243
}
244244

245+
/* If cacheit_lockit is false, this lookup function is lockless */
245246
struct bpf_local_storage_data *
246247
bpf_local_storage_lookup(struct bpf_local_storage *local_storage,
247248
struct bpf_local_storage_map *smap,

kernel/bpf/bpf_task_storage.c

Lines changed: 94 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -184,14 +184,18 @@ static int bpf_pid_task_storage_update_elem(struct bpf_map *map, void *key,
184184
return err;
185185
}
186186

187-
static int task_storage_delete(struct task_struct *task, struct bpf_map *map)
187+
static int task_storage_delete(struct task_struct *task, struct bpf_map *map,
188+
bool nobusy)
188189
{
189190
struct bpf_local_storage_data *sdata;
190191

191192
sdata = task_storage_lookup(task, map, false);
192193
if (!sdata)
193194
return -ENOENT;
194195

196+
if (!nobusy)
197+
return -EBUSY;
198+
195199
bpf_selem_unlink(SELEM(sdata), true);
196200

197201
return 0;
@@ -220,63 +224,108 @@ static int bpf_pid_task_storage_delete_elem(struct bpf_map *map, void *key)
220224
}
221225

222226
bpf_task_storage_lock();
223-
err = task_storage_delete(task, map);
227+
err = task_storage_delete(task, map, true);
224228
bpf_task_storage_unlock();
225229
out:
226230
put_pid(pid);
227231
return err;
228232
}
229233

230-
/* *gfp_flags* is a hidden argument provided by the verifier */
231-
BPF_CALL_5(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *,
232-
task, void *, value, u64, flags, gfp_t, gfp_flags)
234+
/* Called by bpf_task_storage_get*() helpers */
235+
static void *__bpf_task_storage_get(struct bpf_map *map,
236+
struct task_struct *task, void *value,
237+
u64 flags, gfp_t gfp_flags, bool nobusy)
233238
{
234239
struct bpf_local_storage_data *sdata;
235240

236-
WARN_ON_ONCE(!bpf_rcu_lock_held());
237-
if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE))
238-
return (unsigned long)NULL;
239-
240-
if (!task)
241-
return (unsigned long)NULL;
242-
243-
if (!bpf_task_storage_trylock())
244-
return (unsigned long)NULL;
245-
246-
sdata = task_storage_lookup(task, map, true);
241+
sdata = task_storage_lookup(task, map, nobusy);
247242
if (sdata)
248-
goto unlock;
243+
return sdata->data;
249244

250245
/* only allocate new storage, when the task is refcounted */
251246
if (refcount_read(&task->usage) &&
252-
(flags & BPF_LOCAL_STORAGE_GET_F_CREATE))
247+
(flags & BPF_LOCAL_STORAGE_GET_F_CREATE) && nobusy) {
253248
sdata = bpf_local_storage_update(
254249
task, (struct bpf_local_storage_map *)map, value,
255250
BPF_NOEXIST, gfp_flags);
251+
return IS_ERR(sdata) ? NULL : sdata->data;
252+
}
253+
254+
return NULL;
255+
}
256256

257-
unlock:
257+
/* *gfp_flags* is a hidden argument provided by the verifier */
258+
BPF_CALL_5(bpf_task_storage_get_recur, struct bpf_map *, map, struct task_struct *,
259+
task, void *, value, u64, flags, gfp_t, gfp_flags)
260+
{
261+
bool nobusy;
262+
void *data;
263+
264+
WARN_ON_ONCE(!bpf_rcu_lock_held());
265+
if (flags & ~BPF_LOCAL_STORAGE_GET_F_CREATE || !task)
266+
return (unsigned long)NULL;
267+
268+
nobusy = bpf_task_storage_trylock();
269+
data = __bpf_task_storage_get(map, task, value, flags,
270+
gfp_flags, nobusy);
271+
if (nobusy)
272+
bpf_task_storage_unlock();
273+
return (unsigned long)data;
274+
}
275+
276+
/* *gfp_flags* is a hidden argument provided by the verifier */
277+
BPF_CALL_5(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *,
278+
task, void *, value, u64, flags, gfp_t, gfp_flags)
279+
{
280+
void *data;
281+
282+
WARN_ON_ONCE(!bpf_rcu_lock_held());
283+
if (flags & ~BPF_LOCAL_STORAGE_GET_F_CREATE || !task)
284+
return (unsigned long)NULL;
285+
286+
bpf_task_storage_lock();
287+
data = __bpf_task_storage_get(map, task, value, flags,
288+
gfp_flags, true);
258289
bpf_task_storage_unlock();
259-
return IS_ERR_OR_NULL(sdata) ? (unsigned long)NULL :
260-
(unsigned long)sdata->data;
290+
return (unsigned long)data;
261291
}
262292

263-
BPF_CALL_2(bpf_task_storage_delete, struct bpf_map *, map, struct task_struct *,
293+
BPF_CALL_2(bpf_task_storage_delete_recur, struct bpf_map *, map, struct task_struct *,
264294
task)
265295
{
296+
bool nobusy;
266297
int ret;
267298

268299
WARN_ON_ONCE(!bpf_rcu_lock_held());
269300
if (!task)
270301
return -EINVAL;
271302

272-
if (!bpf_task_storage_trylock())
273-
return -EBUSY;
303+
nobusy = bpf_task_storage_trylock();
304+
/* This helper must only be called from places where the lifetime of the task
305+
* is guaranteed. Either by being refcounted or by being protected
306+
* by an RCU read-side critical section.
307+
*/
308+
ret = task_storage_delete(task, map, nobusy);
309+
if (nobusy)
310+
bpf_task_storage_unlock();
311+
return ret;
312+
}
313+
314+
BPF_CALL_2(bpf_task_storage_delete, struct bpf_map *, map, struct task_struct *,
315+
task)
316+
{
317+
int ret;
318+
319+
WARN_ON_ONCE(!bpf_rcu_lock_held());
320+
if (!task)
321+
return -EINVAL;
274322

323+
bpf_task_storage_lock();
275324
/* This helper must only be called from places where the lifetime of the task
276325
* is guaranteed. Either by being refcounted or by being protected
277326
* by an RCU read-side critical section.
278327
*/
279-
ret = task_storage_delete(task, map);
328+
ret = task_storage_delete(task, map, true);
280329
bpf_task_storage_unlock();
281330
return ret;
282331
}
@@ -322,6 +371,17 @@ const struct bpf_map_ops task_storage_map_ops = {
322371
.map_owner_storage_ptr = task_storage_ptr,
323372
};
324373

374+
const struct bpf_func_proto bpf_task_storage_get_recur_proto = {
375+
.func = bpf_task_storage_get_recur,
376+
.gpl_only = false,
377+
.ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
378+
.arg1_type = ARG_CONST_MAP_PTR,
379+
.arg2_type = ARG_PTR_TO_BTF_ID,
380+
.arg2_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK],
381+
.arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL,
382+
.arg4_type = ARG_ANYTHING,
383+
};
384+
325385
const struct bpf_func_proto bpf_task_storage_get_proto = {
326386
.func = bpf_task_storage_get,
327387
.gpl_only = false,
@@ -333,6 +393,15 @@ const struct bpf_func_proto bpf_task_storage_get_proto = {
333393
.arg4_type = ARG_ANYTHING,
334394
};
335395

396+
const struct bpf_func_proto bpf_task_storage_delete_recur_proto = {
397+
.func = bpf_task_storage_delete_recur,
398+
.gpl_only = false,
399+
.ret_type = RET_INTEGER,
400+
.arg1_type = ARG_CONST_MAP_PTR,
401+
.arg2_type = ARG_PTR_TO_BTF_ID,
402+
.arg2_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK],
403+
};
404+
336405
const struct bpf_func_proto bpf_task_storage_delete_proto = {
337406
.func = bpf_task_storage_delete,
338407
.gpl_only = false,

kernel/bpf/syscall.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5133,13 +5133,14 @@ int kern_sys_bpf(int cmd, union bpf_attr *attr, unsigned int size)
51335133

51345134
run_ctx.bpf_cookie = 0;
51355135
run_ctx.saved_run_ctx = NULL;
5136-
if (!__bpf_prog_enter_sleepable(prog, &run_ctx)) {
5136+
if (!__bpf_prog_enter_sleepable_recur(prog, &run_ctx)) {
51375137
/* recursion detected */
51385138
bpf_prog_put(prog);
51395139
return -EBUSY;
51405140
}
51415141
attr->test.retval = bpf_prog_run(prog, (void *) (long) attr->test.ctx_in);
5142-
__bpf_prog_exit_sleepable(prog, 0 /* bpf_prog_run does runtime stats */, &run_ctx);
5142+
__bpf_prog_exit_sleepable_recur(prog, 0 /* bpf_prog_run does runtime stats */,
5143+
&run_ctx);
51435144
bpf_prog_put(prog);
51445145
return 0;
51455146
#endif

0 commit comments

Comments
 (0)