Skip to content

Commit bfbe5ba

Browse files
committed
Merge branch 'ptr_ring-fixes'
Michael S. Tsirkin says: ==================== ptr_ring fixes This fixes a bunch of issues around ptr_ring use in net core. One of these: "tap: fix use-after-free" is also needed on net, but can't be backported cleanly. I will post a net patch separately. Lightly tested - Jason, could you pls confirm this addresses the security issue you saw with ptr_ring? Testing reports would be appreciated too. ==================== Signed-off-by: David S. Miller <[email protected]> Tested-by: Jason Wang <[email protected]> Acked-by: Jason Wang <[email protected]>
2 parents 7ece54a + 491847f commit bfbe5ba

File tree

7 files changed

+110
-45
lines changed

7 files changed

+110
-45
lines changed

drivers/net/tap.c

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -330,9 +330,6 @@ rx_handler_result_t tap_handle_frame(struct sk_buff **pskb)
330330
if (!q)
331331
return RX_HANDLER_PASS;
332332

333-
if (__ptr_ring_full(&q->ring))
334-
goto drop;
335-
336333
skb_push(skb, ETH_HLEN);
337334

338335
/* Apply the forward feature mask so that we perform segmentation

include/linux/ptr_ring.h

Lines changed: 48 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,10 @@ struct ptr_ring {
4545
};
4646

4747
/* Note: callers invoking this in a loop must use a compiler barrier,
48-
* for example cpu_relax(). If ring is ever resized, callers must hold
49-
* producer_lock - see e.g. ptr_ring_full. Otherwise, if callers don't hold
50-
* producer_lock, the next call to __ptr_ring_produce may fail.
48+
* for example cpu_relax().
49+
*
50+
* NB: this is unlike __ptr_ring_empty in that callers must hold producer_lock:
51+
* see e.g. ptr_ring_full.
5152
*/
5253
static inline bool __ptr_ring_full(struct ptr_ring *r)
5354
{
@@ -113,7 +114,7 @@ static inline int __ptr_ring_produce(struct ptr_ring *r, void *ptr)
113114
/* Pairs with smp_read_barrier_depends in __ptr_ring_consume. */
114115
smp_wmb();
115116

116-
r->queue[r->producer++] = ptr;
117+
WRITE_ONCE(r->queue[r->producer++], ptr);
117118
if (unlikely(r->producer >= r->size))
118119
r->producer = 0;
119120
return 0;
@@ -169,32 +170,36 @@ static inline int ptr_ring_produce_bh(struct ptr_ring *r, void *ptr)
169170
return ret;
170171
}
171172

172-
/* Note: callers invoking this in a loop must use a compiler barrier,
173-
* for example cpu_relax(). Callers must take consumer_lock
174-
* if they dereference the pointer - see e.g. PTR_RING_PEEK_CALL.
175-
* If ring is never resized, and if the pointer is merely
176-
* tested, there's no need to take the lock - see e.g. __ptr_ring_empty.
177-
* However, if called outside the lock, and if some other CPU
178-
* consumes ring entries at the same time, the value returned
179-
* is not guaranteed to be correct.
180-
* In this case - to avoid incorrectly detecting the ring
181-
* as empty - the CPU consuming the ring entries is responsible
182-
* for either consuming all ring entries until the ring is empty,
183-
* or synchronizing with some other CPU and causing it to
184-
* execute __ptr_ring_peek and/or consume the ring enteries
185-
* after the synchronization point.
186-
*/
187173
static inline void *__ptr_ring_peek(struct ptr_ring *r)
188174
{
189175
if (likely(r->size))
190-
return r->queue[r->consumer_head];
176+
return READ_ONCE(r->queue[r->consumer_head]);
191177
return NULL;
192178
}
193179

194-
/* See __ptr_ring_peek above for locking rules. */
180+
/*
181+
* Test ring empty status without taking any locks.
182+
*
183+
* NB: This is only safe to call if ring is never resized.
184+
*
185+
* However, if some other CPU consumes ring entries at the same time, the value
186+
* returned is not guaranteed to be correct.
187+
*
188+
* In this case - to avoid incorrectly detecting the ring
189+
* as empty - the CPU consuming the ring entries is responsible
190+
* for either consuming all ring entries until the ring is empty,
191+
* or synchronizing with some other CPU and causing it to
192+
* re-test __ptr_ring_empty and/or consume the ring enteries
193+
* after the synchronization point.
194+
*
195+
* Note: callers invoking this in a loop must use a compiler barrier,
196+
* for example cpu_relax().
197+
*/
195198
static inline bool __ptr_ring_empty(struct ptr_ring *r)
196199
{
197-
return !__ptr_ring_peek(r);
200+
if (likely(r->size))
201+
return !r->queue[READ_ONCE(r->consumer_head)];
202+
return true;
198203
}
199204

200205
static inline bool ptr_ring_empty(struct ptr_ring *r)
@@ -248,35 +253,43 @@ static inline void __ptr_ring_discard_one(struct ptr_ring *r)
248253
/* Fundamentally, what we want to do is update consumer
249254
* index and zero out the entry so producer can reuse it.
250255
* Doing it naively at each consume would be as simple as:
251-
* r->queue[r->consumer++] = NULL;
252-
* if (unlikely(r->consumer >= r->size))
253-
* r->consumer = 0;
256+
* consumer = r->consumer;
257+
* r->queue[consumer++] = NULL;
258+
* if (unlikely(consumer >= r->size))
259+
* consumer = 0;
260+
* r->consumer = consumer;
254261
* but that is suboptimal when the ring is full as producer is writing
255262
* out new entries in the same cache line. Defer these updates until a
256263
* batch of entries has been consumed.
257264
*/
258-
int head = r->consumer_head++;
265+
/* Note: we must keep consumer_head valid at all times for __ptr_ring_empty
266+
* to work correctly.
267+
*/
268+
int consumer_head = r->consumer_head;
269+
int head = consumer_head++;
259270

260271
/* Once we have processed enough entries invalidate them in
261272
* the ring all at once so producer can reuse their space in the ring.
262273
* We also do this when we reach end of the ring - not mandatory
263274
* but helps keep the implementation simple.
264275
*/
265-
if (unlikely(r->consumer_head - r->consumer_tail >= r->batch ||
266-
r->consumer_head >= r->size)) {
276+
if (unlikely(consumer_head - r->consumer_tail >= r->batch ||
277+
consumer_head >= r->size)) {
267278
/* Zero out entries in the reverse order: this way we touch the
268279
* cache line that producer might currently be reading the last;
269280
* producer won't make progress and touch other cache lines
270281
* besides the first one until we write out all entries.
271282
*/
272283
while (likely(head >= r->consumer_tail))
273284
r->queue[head--] = NULL;
274-
r->consumer_tail = r->consumer_head;
285+
r->consumer_tail = consumer_head;
275286
}
276-
if (unlikely(r->consumer_head >= r->size)) {
277-
r->consumer_head = 0;
287+
if (unlikely(consumer_head >= r->size)) {
288+
consumer_head = 0;
278289
r->consumer_tail = 0;
279290
}
291+
/* matching READ_ONCE in __ptr_ring_empty for lockless tests */
292+
WRITE_ONCE(r->consumer_head, consumer_head);
280293
}
281294

282295
static inline void *__ptr_ring_consume(struct ptr_ring *r)
@@ -453,12 +466,7 @@ static inline int ptr_ring_consume_batched_bh(struct ptr_ring *r,
453466

454467
static inline void **__ptr_ring_init_queue_alloc(unsigned int size, gfp_t gfp)
455468
{
456-
/* Allocate an extra dummy element at end of ring to avoid consumer head
457-
* or produce head access past the end of the array. Possible when
458-
* producer/consumer operations and __ptr_ring_peek operations run in
459-
* parallel.
460-
*/
461-
return kcalloc(size + 1, sizeof(void *), gfp);
469+
return kcalloc(size, sizeof(void *), gfp);
462470
}
463471

464472
static inline void __ptr_ring_set_size(struct ptr_ring *r, int size)
@@ -532,7 +540,9 @@ static inline void ptr_ring_unconsume(struct ptr_ring *r, void **batch, int n,
532540
goto done;
533541
}
534542
r->queue[head] = batch[--n];
535-
r->consumer_tail = r->consumer_head = head;
543+
r->consumer_tail = head;
544+
/* matching READ_ONCE in __ptr_ring_empty for lockless tests */
545+
WRITE_ONCE(r->consumer_head, head);
536546
}
537547

538548
done:

include/linux/skb_array.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ static inline int skb_array_produce_any(struct skb_array *a, struct sk_buff *skb
6969
*/
7070
static inline bool __skb_array_empty(struct skb_array *a)
7171
{
72-
return !__ptr_ring_peek(&a->ring);
72+
return __ptr_ring_empty(&a->ring);
7373
}
7474

7575
static inline struct sk_buff *__skb_array_peek(struct skb_array *a)

tools/virtio/linux/kernel.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ static inline void free_page(unsigned long addr)
118118
#define dev_err(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__)
119119
#define dev_warn(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__)
120120

121-
#define WARN_ON_ONCE(cond) ((cond) && fprintf (stderr, "WARNING\n"))
121+
#define WARN_ON_ONCE(cond) ((cond) ? fprintf (stderr, "WARNING\n") : 0)
122122

123123
#define min(x, y) ({ \
124124
typeof(x) _min1 = (x); \

tools/virtio/linux/thread_info.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
#define check_copy_size(A, B, C) (1)

tools/virtio/ringtest/main.h

Lines changed: 58 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ static inline void busy_wait(void)
111111
}
112112

113113
#if defined(__x86_64__) || defined(__i386__)
114-
#define smp_mb() asm volatile("lock; addl $0,-128(%%rsp)" ::: "memory", "cc")
114+
#define smp_mb() asm volatile("lock; addl $0,-132(%%rsp)" ::: "memory", "cc")
115115
#else
116116
/*
117117
* Not using __ATOMIC_SEQ_CST since gcc docs say they are only synchronized
@@ -134,4 +134,61 @@ static inline void busy_wait(void)
134134
barrier(); \
135135
} while (0)
136136

137+
#if defined(__i386__) || defined(__x86_64__) || defined(__s390x__)
138+
#define smp_wmb() barrier()
139+
#else
140+
#define smp_wmb() smp_release()
141+
#endif
142+
143+
#ifdef __alpha__
144+
#define smp_read_barrier_depends() smp_acquire()
145+
#else
146+
#define smp_read_barrier_depends() do {} while(0)
147+
#endif
148+
149+
static __always_inline
150+
void __read_once_size(const volatile void *p, void *res, int size)
151+
{
152+
switch (size) { \
153+
case 1: *(unsigned char *)res = *(volatile unsigned char *)p; break; \
154+
case 2: *(unsigned short *)res = *(volatile unsigned short *)p; break; \
155+
case 4: *(unsigned int *)res = *(volatile unsigned int *)p; break; \
156+
case 8: *(unsigned long long *)res = *(volatile unsigned long long *)p; break; \
157+
default: \
158+
barrier(); \
159+
__builtin_memcpy((void *)res, (const void *)p, size); \
160+
barrier(); \
161+
} \
162+
}
163+
164+
static __always_inline void __write_once_size(volatile void *p, void *res, int size)
165+
{
166+
switch (size) {
167+
case 1: *(volatile unsigned char *)p = *(unsigned char *)res; break;
168+
case 2: *(volatile unsigned short *)p = *(unsigned short *)res; break;
169+
case 4: *(volatile unsigned int *)p = *(unsigned int *)res; break;
170+
case 8: *(volatile unsigned long long *)p = *(unsigned long long *)res; break;
171+
default:
172+
barrier();
173+
__builtin_memcpy((void *)p, (const void *)res, size);
174+
barrier();
175+
}
176+
}
177+
178+
#define READ_ONCE(x) \
179+
({ \
180+
union { typeof(x) __val; char __c[1]; } __u; \
181+
__read_once_size(&(x), __u.__c, sizeof(x)); \
182+
smp_read_barrier_depends(); /* Enforce dependency ordering from x */ \
183+
__u.__val; \
184+
})
185+
186+
#define WRITE_ONCE(x, val) \
187+
({ \
188+
union { typeof(x) __val; char __c[1]; } __u = \
189+
{ .__val = (typeof(x)) (val) }; \
190+
__write_once_size(&(x), __u.__c, sizeof(x)); \
191+
__u.__val; \
192+
})
193+
137194
#endif

tools/virtio/ringtest/ptr_ring.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@ bool enable_kick()
187187

188188
bool avail_empty()
189189
{
190-
return !__ptr_ring_peek(&array);
190+
return __ptr_ring_empty(&array);
191191
}
192192

193193
bool use_buf(unsigned *lenp, void **bufp)

0 commit comments

Comments
 (0)