@@ -438,14 +438,16 @@ enum {
438
438
};
439
439
/*
440
440
* Used for which event context the event is in.
441
- * NMI = 0
442
- * IRQ = 1
443
- * SOFTIRQ = 2
444
- * NORMAL = 3
441
+ * TRANSITION = 0
442
+ * NMI = 1
443
+ * IRQ = 2
444
+ * SOFTIRQ = 3
445
+ * NORMAL = 4
445
446
*
446
447
* See trace_recursive_lock() comment below for more details.
447
448
*/
448
449
enum {
450
+ RB_CTX_TRANSITION ,
449
451
RB_CTX_NMI ,
450
452
RB_CTX_IRQ ,
451
453
RB_CTX_SOFTIRQ ,
@@ -3014,10 +3016,10 @@ rb_wakeups(struct trace_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
3014
3016
* a bit of overhead in something as critical as function tracing,
3015
3017
* we use a bitmask trick.
3016
3018
*
3017
- * bit 0 = NMI context
3018
- * bit 1 = IRQ context
3019
- * bit 2 = SoftIRQ context
3020
- * bit 3 = normal context.
3019
+ * bit 1 = NMI context
3020
+ * bit 2 = IRQ context
3021
+ * bit 3 = SoftIRQ context
3022
+ * bit 4 = normal context.
3021
3023
*
3022
3024
* This works because this is the order of contexts that can
3023
3025
* preempt other contexts. A SoftIRQ never preempts an IRQ
@@ -3040,6 +3042,30 @@ rb_wakeups(struct trace_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
3040
3042
* The least significant bit can be cleared this way, and it
3041
3043
* just so happens that it is the same bit corresponding to
3042
3044
* the current context.
3045
+ *
3046
+ * Now the TRANSITION bit breaks the above slightly. The TRANSITION bit
3047
+ * is set when a recursion is detected at the current context, and if
3048
+ * the TRANSITION bit is already set, it will fail the recursion.
3049
+ * This is needed because there's a lag between the changing of
3050
+ * interrupt context and updating the preempt count. In this case,
3051
+ * a false positive will be found. To handle this, one extra recursion
3052
+ * is allowed, and this is done by the TRANSITION bit. If the TRANSITION
3053
+ * bit is already set, then it is considered a recursion and the function
3054
+ * ends. Otherwise, the TRANSITION bit is set, and that bit is returned.
3055
+ *
3056
+ * On the trace_recursive_unlock(), the TRANSITION bit will be the first
3057
+ * to be cleared. Even if it wasn't the context that set it. That is,
3058
+ * if an interrupt comes in while NORMAL bit is set and the ring buffer
3059
+ * is called before preempt_count() is updated, since the check will
3060
+ * be on the NORMAL bit, the TRANSITION bit will then be set. If an
3061
+ * NMI then comes in, it will set the NMI bit, but when the NMI code
3062
+ * does the trace_recursive_unlock() it will clear the TRANSTION bit
3063
+ * and leave the NMI bit set. But this is fine, because the interrupt
3064
+ * code that set the TRANSITION bit will then clear the NMI bit when it
3065
+ * calls trace_recursive_unlock(). If another NMI comes in, it will
3066
+ * set the TRANSITION bit and continue.
3067
+ *
3068
+ * Note: The TRANSITION bit only handles a single transition between context.
3043
3069
*/
3044
3070
3045
3071
static __always_inline int
@@ -3055,8 +3081,16 @@ trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer)
3055
3081
bit = pc & NMI_MASK ? RB_CTX_NMI :
3056
3082
pc & HARDIRQ_MASK ? RB_CTX_IRQ : RB_CTX_SOFTIRQ ;
3057
3083
3058
- if (unlikely (val & (1 << (bit + cpu_buffer -> nest ))))
3059
- return 1 ;
3084
+ if (unlikely (val & (1 << (bit + cpu_buffer -> nest )))) {
3085
+ /*
3086
+ * It is possible that this was called by transitioning
3087
+ * between interrupt context, and preempt_count() has not
3088
+ * been updated yet. In this case, use the TRANSITION bit.
3089
+ */
3090
+ bit = RB_CTX_TRANSITION ;
3091
+ if (val & (1 << (bit + cpu_buffer -> nest )))
3092
+ return 1 ;
3093
+ }
3060
3094
3061
3095
val |= (1 << (bit + cpu_buffer -> nest ));
3062
3096
cpu_buffer -> current_context = val ;
@@ -3071,8 +3105,8 @@ trace_recursive_unlock(struct ring_buffer_per_cpu *cpu_buffer)
3071
3105
cpu_buffer -> current_context - (1 << cpu_buffer -> nest );
3072
3106
}
3073
3107
3074
- /* The recursive locking above uses 4 bits */
3075
- #define NESTED_BITS 4
3108
+ /* The recursive locking above uses 5 bits */
3109
+ #define NESTED_BITS 5
3076
3110
3077
3111
/**
3078
3112
* ring_buffer_nest_start - Allow to trace while nested
0 commit comments