@@ -410,8 +410,6 @@ static const struct bpf_func_proto bpf_perf_event_read_value_proto = {
410
410
.arg4_type = ARG_CONST_SIZE ,
411
411
};
412
412
413
- static DEFINE_PER_CPU (struct perf_sample_data , bpf_trace_sd ) ;
414
-
415
413
static __always_inline u64
416
414
__bpf_perf_event_output (struct pt_regs * regs , struct bpf_map * map ,
417
415
u64 flags , struct perf_sample_data * sd )
@@ -442,24 +440,50 @@ __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map,
442
440
return perf_event_output (event , sd , regs );
443
441
}
444
442
443
+ /*
444
+ * Support executing tracepoints in normal, irq, and nmi context that each call
445
+ * bpf_perf_event_output
446
+ */
447
+ struct bpf_trace_sample_data {
448
+ struct perf_sample_data sds [3 ];
449
+ };
450
+
451
+ static DEFINE_PER_CPU (struct bpf_trace_sample_data , bpf_trace_sds ) ;
452
+ static DEFINE_PER_CPU (int , bpf_trace_nest_level );
445
453
BPF_CALL_5 (bpf_perf_event_output , struct pt_regs * , regs , struct bpf_map * , map ,
446
454
u64 , flags , void * , data , u64 , size )
447
455
{
448
- struct perf_sample_data * sd = this_cpu_ptr (& bpf_trace_sd );
456
+ struct bpf_trace_sample_data * sds = this_cpu_ptr (& bpf_trace_sds );
457
+ int nest_level = this_cpu_inc_return (bpf_trace_nest_level );
449
458
struct perf_raw_record raw = {
450
459
.frag = {
451
460
.size = size ,
452
461
.data = data ,
453
462
},
454
463
};
464
+ struct perf_sample_data * sd ;
465
+ int err ;
455
466
456
- if (unlikely (flags & ~(BPF_F_INDEX_MASK )))
457
- return - EINVAL ;
467
+ if (WARN_ON_ONCE (nest_level > ARRAY_SIZE (sds -> sds ))) {
468
+ err = - EBUSY ;
469
+ goto out ;
470
+ }
471
+
472
+ sd = & sds -> sds [nest_level - 1 ];
473
+
474
+ if (unlikely (flags & ~(BPF_F_INDEX_MASK ))) {
475
+ err = - EINVAL ;
476
+ goto out ;
477
+ }
458
478
459
479
perf_sample_data_init (sd , 0 , 0 );
460
480
sd -> raw = & raw ;
461
481
462
- return __bpf_perf_event_output (regs , map , flags , sd );
482
+ err = __bpf_perf_event_output (regs , map , flags , sd );
483
+
484
+ out :
485
+ this_cpu_dec (bpf_trace_nest_level );
486
+ return err ;
463
487
}
464
488
465
489
static const struct bpf_func_proto bpf_perf_event_output_proto = {
@@ -822,16 +846,48 @@ pe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
822
846
/*
823
847
* bpf_raw_tp_regs are separate from bpf_pt_regs used from skb/xdp
824
848
* to avoid potential recursive reuse issue when/if tracepoints are added
825
- * inside bpf_*_event_output, bpf_get_stackid and/or bpf_get_stack
849
+ * inside bpf_*_event_output, bpf_get_stackid and/or bpf_get_stack.
850
+ *
851
+ * Since raw tracepoints run despite bpf_prog_active, support concurrent usage
852
+ * in normal, irq, and nmi context.
826
853
*/
827
- static DEFINE_PER_CPU (struct pt_regs , bpf_raw_tp_regs );
854
+ struct bpf_raw_tp_regs {
855
+ struct pt_regs regs [3 ];
856
+ };
857
+ static DEFINE_PER_CPU (struct bpf_raw_tp_regs , bpf_raw_tp_regs ) ;
858
+ static DEFINE_PER_CPU (int , bpf_raw_tp_nest_level ) ;
859
+ static struct pt_regs * get_bpf_raw_tp_regs (void )
860
+ {
861
+ struct bpf_raw_tp_regs * tp_regs = this_cpu_ptr (& bpf_raw_tp_regs );
862
+ int nest_level = this_cpu_inc_return (bpf_raw_tp_nest_level );
863
+
864
+ if (WARN_ON_ONCE (nest_level > ARRAY_SIZE (tp_regs -> regs ))) {
865
+ this_cpu_dec (bpf_raw_tp_nest_level );
866
+ return ERR_PTR (- EBUSY );
867
+ }
868
+
869
+ return & tp_regs -> regs [nest_level - 1 ];
870
+ }
871
+
872
+ static void put_bpf_raw_tp_regs (void )
873
+ {
874
+ this_cpu_dec (bpf_raw_tp_nest_level );
875
+ }
876
+
828
877
BPF_CALL_5 (bpf_perf_event_output_raw_tp , struct bpf_raw_tracepoint_args * , args ,
829
878
struct bpf_map * , map , u64 , flags , void * , data , u64 , size )
830
879
{
831
- struct pt_regs * regs = this_cpu_ptr (& bpf_raw_tp_regs );
880
+ struct pt_regs * regs = get_bpf_raw_tp_regs ();
881
+ int ret ;
882
+
883
+ if (IS_ERR (regs ))
884
+ return PTR_ERR (regs );
832
885
833
886
perf_fetch_caller_regs (regs );
834
- return ____bpf_perf_event_output (regs , map , flags , data , size );
887
+ ret = ____bpf_perf_event_output (regs , map , flags , data , size );
888
+
889
+ put_bpf_raw_tp_regs ();
890
+ return ret ;
835
891
}
836
892
837
893
static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = {
@@ -848,12 +904,18 @@ static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = {
848
904
BPF_CALL_3 (bpf_get_stackid_raw_tp , struct bpf_raw_tracepoint_args * , args ,
849
905
struct bpf_map * , map , u64 , flags )
850
906
{
851
- struct pt_regs * regs = this_cpu_ptr (& bpf_raw_tp_regs );
907
+ struct pt_regs * regs = get_bpf_raw_tp_regs ();
908
+ int ret ;
909
+
910
+ if (IS_ERR (regs ))
911
+ return PTR_ERR (regs );
852
912
853
913
perf_fetch_caller_regs (regs );
854
914
/* similar to bpf_perf_event_output_tp, but pt_regs fetched differently */
855
- return bpf_get_stackid ((unsigned long ) regs , (unsigned long ) map ,
856
- flags , 0 , 0 );
915
+ ret = bpf_get_stackid ((unsigned long ) regs , (unsigned long ) map ,
916
+ flags , 0 , 0 );
917
+ put_bpf_raw_tp_regs ();
918
+ return ret ;
857
919
}
858
920
859
921
static const struct bpf_func_proto bpf_get_stackid_proto_raw_tp = {
@@ -868,11 +930,17 @@ static const struct bpf_func_proto bpf_get_stackid_proto_raw_tp = {
868
930
BPF_CALL_4 (bpf_get_stack_raw_tp , struct bpf_raw_tracepoint_args * , args ,
869
931
void * , buf , u32 , size , u64 , flags )
870
932
{
871
- struct pt_regs * regs = this_cpu_ptr (& bpf_raw_tp_regs );
933
+ struct pt_regs * regs = get_bpf_raw_tp_regs ();
934
+ int ret ;
935
+
936
+ if (IS_ERR (regs ))
937
+ return PTR_ERR (regs );
872
938
873
939
perf_fetch_caller_regs (regs );
874
- return bpf_get_stack ((unsigned long ) regs , (unsigned long ) buf ,
875
- (unsigned long ) size , flags , 0 );
940
+ ret = bpf_get_stack ((unsigned long ) regs , (unsigned long ) buf ,
941
+ (unsigned long ) size , flags , 0 );
942
+ put_bpf_raw_tp_regs ();
943
+ return ret ;
876
944
}
877
945
878
946
static const struct bpf_func_proto bpf_get_stack_proto_raw_tp = {
0 commit comments