Skip to content

Commit f38a9eb

Browse files
tgrafdavem330
authored andcommitted
dst: Metadata destinations
Introduces a new dst_metadata which enables to carry per packet metadata between forwarding and processing elements via the skb->dst pointer. The structure is set up to be a union. Thus, each separate type of metadata requires its own dst instance. If demand arises to carry multiple types of metadata concurrently, metadata dst entries can be made stackable. The metadata dst entry is refcnt'ed as expected for now but a non reference counted use is possible if the reference is forced before queueing the skb. In order to allow allocating dsts with variable length, the existing dst_alloc() is split into a dst_alloc() and dst_init() function. The existing dst_init() function to initialize the subsystem is being renamed to dst_subsys_init() to make it clear what is what. The check before ip_route_input() is changed to ignore metadata dsts and drop the dst inside the routing function thus allowing to interpret metadata in a later commit. Signed-off-by: Thomas Graf <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 773a69d commit f38a9eb

File tree

6 files changed

+112
-17
lines changed

6 files changed

+112
-17
lines changed

include/net/dst.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ struct dst_entry {
5757
#define DST_FAKE_RTABLE 0x0040
5858
#define DST_XFRM_TUNNEL 0x0080
5959
#define DST_XFRM_QUEUE 0x0100
60+
#define DST_METADATA 0x0200
6061

6162
unsigned short pending_confirm;
6263

@@ -356,6 +357,9 @@ static inline int dst_discard(struct sk_buff *skb)
356357
}
357358
void *dst_alloc(struct dst_ops *ops, struct net_device *dev, int initial_ref,
358359
int initial_obsolete, unsigned short flags);
360+
void dst_init(struct dst_entry *dst, struct dst_ops *ops,
361+
struct net_device *dev, int initial_ref, int initial_obsolete,
362+
unsigned short flags);
359363
void __dst_free(struct dst_entry *dst);
360364
struct dst_entry *dst_destroy(struct dst_entry *dst);
361365

@@ -457,7 +461,7 @@ static inline struct dst_entry *dst_check(struct dst_entry *dst, u32 cookie)
457461
return dst;
458462
}
459463

460-
void dst_init(void);
464+
void dst_subsys_init(void);
461465

462466
/* Flags for xfrm_lookup flags argument. */
463467
enum {

include/net/dst_metadata.h

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
#ifndef __NET_DST_METADATA_H
2+
#define __NET_DST_METADATA_H 1
3+
4+
#include <linux/skbuff.h>
5+
#include <net/ip_tunnels.h>
6+
#include <net/dst.h>
7+
8+
struct metadata_dst {
9+
struct dst_entry dst;
10+
size_t opts_len;
11+
};
12+
13+
static inline struct metadata_dst *skb_metadata_dst(struct sk_buff *skb)
14+
{
15+
struct metadata_dst *md_dst = (struct metadata_dst *) skb_dst(skb);
16+
17+
if (md_dst && md_dst->dst.flags & DST_METADATA)
18+
return md_dst;
19+
20+
return NULL;
21+
}
22+
23+
static inline bool skb_valid_dst(const struct sk_buff *skb)
24+
{
25+
struct dst_entry *dst = skb_dst(skb);
26+
27+
return dst && !(dst->flags & DST_METADATA);
28+
}
29+
30+
struct metadata_dst *metadata_dst_alloc(u8 optslen, gfp_t flags);
31+
32+
#endif /* __NET_DST_METADATA_H */

net/core/dev.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7669,7 +7669,7 @@ static int __init net_dev_init(void)
76697669
open_softirq(NET_RX_SOFTIRQ, net_rx_action);
76707670

76717671
hotcpu_notifier(dev_cpu_callback, 0);
7672-
dst_init();
7672+
dst_subsys_init();
76737673
rc = 0;
76747674
out:
76757675
return rc;

net/core/dst.c

Lines changed: 70 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include <linux/prefetch.h>
2323

2424
#include <net/dst.h>
25+
#include <net/dst_metadata.h>
2526

2627
/*
2728
* Theory of operations:
@@ -158,19 +159,10 @@ const u32 dst_default_metrics[RTAX_MAX + 1] = {
158159
[RTAX_MAX] = 0xdeadbeef,
159160
};
160161

161-
162-
void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
163-
int initial_ref, int initial_obsolete, unsigned short flags)
162+
void dst_init(struct dst_entry *dst, struct dst_ops *ops,
163+
struct net_device *dev, int initial_ref, int initial_obsolete,
164+
unsigned short flags)
164165
{
165-
struct dst_entry *dst;
166-
167-
if (ops->gc && dst_entries_get_fast(ops) > ops->gc_thresh) {
168-
if (ops->gc(ops))
169-
return NULL;
170-
}
171-
dst = kmem_cache_alloc(ops->kmem_cachep, GFP_ATOMIC);
172-
if (!dst)
173-
return NULL;
174166
dst->child = NULL;
175167
dst->dev = dev;
176168
if (dev)
@@ -200,6 +192,25 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
200192
dst->next = NULL;
201193
if (!(flags & DST_NOCOUNT))
202194
dst_entries_add(ops, 1);
195+
}
196+
EXPORT_SYMBOL(dst_init);
197+
198+
void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
199+
int initial_ref, int initial_obsolete, unsigned short flags)
200+
{
201+
struct dst_entry *dst;
202+
203+
if (ops->gc && dst_entries_get_fast(ops) > ops->gc_thresh) {
204+
if (ops->gc(ops))
205+
return NULL;
206+
}
207+
208+
dst = kmem_cache_alloc(ops->kmem_cachep, GFP_ATOMIC);
209+
if (!dst)
210+
return NULL;
211+
212+
dst_init(dst, ops, dev, initial_ref, initial_obsolete, flags);
213+
203214
return dst;
204215
}
205216
EXPORT_SYMBOL(dst_alloc);
@@ -248,7 +259,11 @@ struct dst_entry *dst_destroy(struct dst_entry * dst)
248259
dst->ops->destroy(dst);
249260
if (dst->dev)
250261
dev_put(dst->dev);
251-
kmem_cache_free(dst->ops->kmem_cachep, dst);
262+
263+
if (dst->flags & DST_METADATA)
264+
kfree(dst);
265+
else
266+
kmem_cache_free(dst->ops->kmem_cachep, dst);
252267

253268
dst = child;
254269
if (dst) {
@@ -327,6 +342,47 @@ void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old)
327342
}
328343
EXPORT_SYMBOL(__dst_destroy_metrics_generic);
329344

345+
static struct dst_ops md_dst_ops = {
346+
.family = AF_UNSPEC,
347+
};
348+
349+
static int dst_md_discard_sk(struct sock *sk, struct sk_buff *skb)
350+
{
351+
WARN_ONCE(1, "Attempting to call output on metadata dst\n");
352+
kfree_skb(skb);
353+
return 0;
354+
}
355+
356+
static int dst_md_discard(struct sk_buff *skb)
357+
{
358+
WARN_ONCE(1, "Attempting to call input on metadata dst\n");
359+
kfree_skb(skb);
360+
return 0;
361+
}
362+
363+
struct metadata_dst *metadata_dst_alloc(u8 optslen, gfp_t flags)
364+
{
365+
struct metadata_dst *md_dst;
366+
struct dst_entry *dst;
367+
368+
md_dst = kmalloc(sizeof(*md_dst) + optslen, flags);
369+
if (!md_dst)
370+
return ERR_PTR(-ENOMEM);
371+
372+
dst = &md_dst->dst;
373+
dst_init(dst, &md_dst_ops, NULL, 1, DST_OBSOLETE_NONE,
374+
DST_METADATA | DST_NOCACHE | DST_NOCOUNT);
375+
376+
dst->input = dst_md_discard;
377+
dst->output = dst_md_discard_sk;
378+
379+
memset(dst + 1, 0, sizeof(*md_dst) + optslen - sizeof(*dst));
380+
md_dst->opts_len = optslen;
381+
382+
return md_dst;
383+
}
384+
EXPORT_SYMBOL_GPL(metadata_dst_alloc);
385+
330386
/* Dirty hack. We did it in 2.2 (in __dst_free),
331387
* we have _very_ good reasons not to repeat
332388
* this mistake in 2.3, but we have no choice
@@ -391,7 +447,7 @@ static struct notifier_block dst_dev_notifier = {
391447
.priority = -10, /* must be called after other network notifiers */
392448
};
393449

394-
void __init dst_init(void)
450+
void __init dst_subsys_init(void)
395451
{
396452
register_netdevice_notifier(&dst_dev_notifier);
397453
}

net/ipv4/ip_input.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,7 @@
146146
#include <net/xfrm.h>
147147
#include <linux/mroute.h>
148148
#include <linux/netlink.h>
149+
#include <net/dst_metadata.h>
149150

150151
/*
151152
* Process Router Attention IP option (RFC 2113)
@@ -331,7 +332,7 @@ static int ip_rcv_finish(struct sock *sk, struct sk_buff *skb)
331332
* Initialise the virtual path cache for the packet. It describes
332333
* how the packet travels inside Linux networking.
333334
*/
334-
if (!skb_dst(skb)) {
335+
if (!skb_valid_dst(skb)) {
335336
int err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
336337
iph->tos, skb->dev);
337338
if (unlikely(err)) {

net/ipv4/route.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1690,6 +1690,8 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
16901690
by fib_lookup.
16911691
*/
16921692

1693+
skb_dst_drop(skb);
1694+
16931695
if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
16941696
goto martian_source;
16951697

0 commit comments

Comments
 (0)