Skip to content

Commit 2ddf71e

Browse files
jrfastabdavem330
authored andcommitted
net: add notifier hooks for devmap bpf map
The BPF map devmap holds a refcnt on the net_device structure when it is in the map. We need to do this to ensure on driver unload we don't lose a dev reference. However, its not very convenient to have to manually unload the map when destroying a net device so add notifier handlers to do the cleanup automatically. But this creates a race between update/destroy BPF syscall and programs and the unregister netdev hook. Unfortunately, the best I could come up with is either to live with requiring manual removal of net devices from the map before removing the net device OR to add a mutex in devmap to ensure the map is not modified while we are removing a device. The fallout also requires that BPF programs no longer update/delete the map from the BPF program side because the mutex may sleep and this can not be done from inside an rcu critical section. This is not a real problem though because I have not come up with any use cases where this is actually useful in practice. If/when we come up with a compelling user for this we may need to revisit this. Signed-off-by: John Fastabend <[email protected]> Acked-by: Daniel Borkmann <[email protected]> Acked-by: Jesper Dangaard Brouer <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 11393cc commit 2ddf71e

File tree

3 files changed

+75
-2
lines changed

3 files changed

+75
-2
lines changed

include/linux/filter.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -716,7 +716,7 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
716716
* same cpu context. Further for best results no more than a single map
717717
* for the do_redirect/do_flush pair should be used. This limitation is
718718
* because we only track one map and force a flush when the map changes.
719-
* This does not appear to be a real limiation for existing software.
719+
* This does not appear to be a real limitation for existing software.
720720
*/
721721
int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb);
722722
int xdp_do_redirect(struct net_device *dev,

kernel/bpf/devmap.c

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,17 @@
3434
* netdev_map consistent in this case. From the devmap side BPF programs
3535
* calling into these operations are the same as multiple user space threads
3636
* making system calls.
37+
*
38+
* Finally, any of the above may race with a netdev_unregister notifier. The
39+
* unregister notifier must search for net devices in the map structure that
40+
* contain a reference to the net device and remove them. This is a two step
41+
* process (a) dereference the bpf_dtab_netdev object in netdev_map and (b)
42+
* check to see if the ifindex is the same as the net_device being removed.
43+
* Unfortunately, the xchg() operations do not protect against this. To avoid
44+
* potentially removing incorrect objects the dev_map_list_mutex protects
45+
* conflicting netdev unregister and BPF syscall operations. Updates and
46+
* deletes from a BPF program (done in rcu critical section) are blocked
47+
* because of this mutex.
3748
*/
3849
#include <linux/bpf.h>
3950
#include <linux/jhash.h>
@@ -54,8 +65,12 @@ struct bpf_dtab {
5465
struct bpf_map map;
5566
struct bpf_dtab_netdev **netdev_map;
5667
unsigned long int __percpu *flush_needed;
68+
struct list_head list;
5769
};
5870

71+
static DEFINE_MUTEX(dev_map_list_mutex);
72+
static LIST_HEAD(dev_map_list);
73+
5974
static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
6075
{
6176
struct bpf_dtab *dtab;
@@ -112,6 +127,9 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
112127
if (!dtab->netdev_map)
113128
goto free_dtab;
114129

130+
mutex_lock(&dev_map_list_mutex);
131+
list_add_tail(&dtab->list, &dev_map_list);
132+
mutex_unlock(&dev_map_list_mutex);
115133
return &dtab->map;
116134

117135
free_dtab:
@@ -146,6 +164,11 @@ static void dev_map_free(struct bpf_map *map)
146164
cpu_relax();
147165
}
148166

167+
/* Although we should no longer have datapath or bpf syscall operations
168+
* at this point we we can still race with netdev notifier, hence the
169+
* lock.
170+
*/
171+
mutex_lock(&dev_map_list_mutex);
149172
for (i = 0; i < dtab->map.max_entries; i++) {
150173
struct bpf_dtab_netdev *dev;
151174

@@ -160,6 +183,8 @@ static void dev_map_free(struct bpf_map *map)
160183
/* At this point bpf program is detached and all pending operations
161184
* _must_ be complete
162185
*/
186+
list_del(&dtab->list);
187+
mutex_unlock(&dev_map_list_mutex);
163188
free_percpu(dtab->flush_needed);
164189
bpf_map_area_free(dtab->netdev_map);
165190
kfree(dtab);
@@ -296,9 +321,11 @@ static int dev_map_delete_elem(struct bpf_map *map, void *key)
296321
* the driver tear down ensures all soft irqs are complete before
297322
* removing the net device in the case of dev_put equals zero.
298323
*/
324+
mutex_lock(&dev_map_list_mutex);
299325
old_dev = xchg(&dtab->netdev_map[k], NULL);
300326
if (old_dev)
301327
call_rcu(&old_dev->rcu, __dev_map_entry_free);
328+
mutex_unlock(&dev_map_list_mutex);
302329
return 0;
303330
}
304331

@@ -341,9 +368,11 @@ static int dev_map_update_elem(struct bpf_map *map, void *key, void *value,
341368
* Remembering the driver side flush operation will happen before the
342369
* net device is removed.
343370
*/
371+
mutex_lock(&dev_map_list_mutex);
344372
old_dev = xchg(&dtab->netdev_map[i], dev);
345373
if (old_dev)
346374
call_rcu(&old_dev->rcu, __dev_map_entry_free);
375+
mutex_unlock(&dev_map_list_mutex);
347376

348377
return 0;
349378
}
@@ -356,3 +385,47 @@ const struct bpf_map_ops dev_map_ops = {
356385
.map_update_elem = dev_map_update_elem,
357386
.map_delete_elem = dev_map_delete_elem,
358387
};
388+
389+
static int dev_map_notification(struct notifier_block *notifier,
390+
ulong event, void *ptr)
391+
{
392+
struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
393+
struct bpf_dtab *dtab;
394+
int i;
395+
396+
switch (event) {
397+
case NETDEV_UNREGISTER:
398+
mutex_lock(&dev_map_list_mutex);
399+
list_for_each_entry(dtab, &dev_map_list, list) {
400+
for (i = 0; i < dtab->map.max_entries; i++) {
401+
struct bpf_dtab_netdev *dev;
402+
403+
dev = dtab->netdev_map[i];
404+
if (!dev ||
405+
dev->dev->ifindex != netdev->ifindex)
406+
continue;
407+
dev = xchg(&dtab->netdev_map[i], NULL);
408+
if (dev)
409+
call_rcu(&dev->rcu,
410+
__dev_map_entry_free);
411+
}
412+
}
413+
mutex_unlock(&dev_map_list_mutex);
414+
break;
415+
default:
416+
break;
417+
}
418+
return NOTIFY_OK;
419+
}
420+
421+
static struct notifier_block dev_map_notifier = {
422+
.notifier_call = dev_map_notification,
423+
};
424+
425+
static int __init dev_map_init(void)
426+
{
427+
register_netdevice_notifier(&dev_map_notifier);
428+
return 0;
429+
}
430+
431+
subsys_initcall(dev_map_init);

kernel/bpf/verifier.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1281,7 +1281,7 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
12811281
* for now.
12821282
*/
12831283
case BPF_MAP_TYPE_DEVMAP:
1284-
if (func_id == BPF_FUNC_map_lookup_elem)
1284+
if (func_id != BPF_FUNC_redirect_map)
12851285
goto error;
12861286
break;
12871287
case BPF_MAP_TYPE_ARRAY_OF_MAPS:

0 commit comments

Comments
 (0)