Skip to content

Commit 81d947e

Browse files
borkmannpbholejrfastab
authored andcommitted
net, sched: fix panic when updating miniq {b,q}stats
While working on fixing another bug, I ran into the following panic on arm64 by simply attaching clsact qdisc, adding a filter and running traffic on ingress to it: [...] [ 178.188591] Unable to handle kernel read from unreadable memory at virtual address 810fb501f000 [ 178.197314] Mem abort info: [ 178.200121] ESR = 0x96000004 [ 178.203168] Exception class = DABT (current EL), IL = 32 bits [ 178.209095] SET = 0, FnV = 0 [ 178.212157] EA = 0, S1PTW = 0 [ 178.215288] Data abort info: [ 178.218175] ISV = 0, ISS = 0x00000004 [ 178.222019] CM = 0, WnR = 0 [ 178.224997] user pgtable: 4k pages, 48-bit VAs, pgd = 0000000023cb3f33 [ 178.231531] [0000810fb501f000] *pgd=0000000000000000 [ 178.236508] Internal error: Oops: 96000004 [#1] SMP [...] [ 178.311855] CPU: 73 PID: 2497 Comm: ping Tainted: G W 4.15.0-rc7+ #5 [ 178.319413] Hardware name: FOXCONN R2-1221R-A4/C2U4N_MB, BIOS G31FB18A 03/31/2017 [ 178.326887] pstate: 60400005 (nZCv daif +PAN -UAO) [ 178.331685] pc : __netif_receive_skb_core+0x49c/0xac8 [ 178.336728] lr : __netif_receive_skb+0x28/0x78 [ 178.341161] sp : ffff00002344b750 [ 178.344465] x29: ffff00002344b750 x28: ffff810fbdfd0580 [ 178.349769] x27: 0000000000000000 x26: ffff000009378000 [...] [ 178.418715] x1 : 0000000000000054 x0 : 0000000000000000 [ 178.424020] Process ping (pid: 2497, stack limit = 0x000000009f0a3ff4) [ 178.430537] Call trace: [ 178.432976] __netif_receive_skb_core+0x49c/0xac8 [ 178.437670] __netif_receive_skb+0x28/0x78 [ 178.441757] process_backlog+0x9c/0x160 [ 178.445584] net_rx_action+0x2f8/0x3f0 [...] Reason is that sch_ingress and sch_clsact are doing mini_qdisc_pair_init() which sets up miniq pointers to cpu_{b,q}stats from the underlying qdisc. Problem is that this cannot work since they are actually set up right after the qdisc ->init() callback in qdisc_create(), so first packet going into sch_handle_ingress() tries to call mini_qdisc_bstats_cpu_update() and we therefore panic. In order to fix this, allocation of {b,q}stats needs to happen before we call into ->init(). In net-next, there's already such option through commit d59f5ff ("net: sched: a dflt qdisc may be used with per cpu stats"). However, the bug needs to be fixed in net still for 4.15. Thus, include these bits to reduce any merge churn and reuse the static_flags field to set TCQ_F_CPUSTATS, and remove the allocation from qdisc_create() since there is no other user left. Prashant Bhole ran into the same issue but for net-next, thus adding him below as well as co-author. Same issue was also reported by Sandipan Das when using bcc. Fixes: 4620940 ("net: core: introduce mini_Qdisc and eliminate usage of tp->q for clsact fastpath") Reference: https://lists.iovisor.org/pipermail/iovisor-dev/2018-January/001190.html Reported-by: Sandipan Das <[email protected]> Co-authored-by: Prashant Bhole <[email protected]> Co-authored-by: John Fastabend <[email protected]> Signed-off-by: Daniel Borkmann <[email protected]> Cc: Jiri Pirko <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 70eeff6 commit 81d947e

File tree

4 files changed

+24
-30
lines changed

4 files changed

+24
-30
lines changed

include/net/sch_generic.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,7 @@ struct Qdisc_ops {
179179
const struct Qdisc_class_ops *cl_ops;
180180
char id[IFNAMSIZ];
181181
int priv_size;
182+
unsigned int static_flags;
182183

183184
int (*enqueue)(struct sk_buff *skb,
184185
struct Qdisc *sch,
@@ -444,6 +445,7 @@ void qdisc_tree_reduce_backlog(struct Qdisc *qdisc, unsigned int n,
444445
unsigned int len);
445446
struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
446447
const struct Qdisc_ops *ops);
448+
void qdisc_free(struct Qdisc *qdisc);
447449
struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
448450
const struct Qdisc_ops *ops, u32 parentid);
449451
void __qdisc_calculate_pkt_len(struct sk_buff *skb,

net/sched/sch_api.c

Lines changed: 1 addition & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1063,17 +1063,6 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
10631063
}
10641064

10651065
if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
1066-
if (qdisc_is_percpu_stats(sch)) {
1067-
sch->cpu_bstats =
1068-
netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
1069-
if (!sch->cpu_bstats)
1070-
goto err_out4;
1071-
1072-
sch->cpu_qstats = alloc_percpu(struct gnet_stats_queue);
1073-
if (!sch->cpu_qstats)
1074-
goto err_out4;
1075-
}
1076-
10771066
if (tca[TCA_STAB]) {
10781067
stab = qdisc_get_stab(tca[TCA_STAB]);
10791068
if (IS_ERR(stab)) {
@@ -1115,16 +1104,14 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
11151104
ops->destroy(sch);
11161105
err_out3:
11171106
dev_put(dev);
1118-
kfree((char *) sch - sch->padded);
1107+
qdisc_free(sch);
11191108
err_out2:
11201109
module_put(ops->owner);
11211110
err_out:
11221111
*errp = err;
11231112
return NULL;
11241113

11251114
err_out4:
1126-
free_percpu(sch->cpu_bstats);
1127-
free_percpu(sch->cpu_qstats);
11281115
/*
11291116
* Any broken qdiscs that would require a ops->reset() here?
11301117
* The qdisc was never in action so it shouldn't be necessary.

net/sched/sch_generic.c

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -633,6 +633,19 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
633633
qdisc_skb_head_init(&sch->q);
634634
spin_lock_init(&sch->q.lock);
635635

636+
if (ops->static_flags & TCQ_F_CPUSTATS) {
637+
sch->cpu_bstats =
638+
netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
639+
if (!sch->cpu_bstats)
640+
goto errout1;
641+
642+
sch->cpu_qstats = alloc_percpu(struct gnet_stats_queue);
643+
if (!sch->cpu_qstats) {
644+
free_percpu(sch->cpu_bstats);
645+
goto errout1;
646+
}
647+
}
648+
636649
spin_lock_init(&sch->busylock);
637650
lockdep_set_class(&sch->busylock,
638651
dev->qdisc_tx_busylock ?: &qdisc_tx_busylock);
@@ -642,13 +655,16 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
642655
dev->qdisc_running_key ?: &qdisc_running_key);
643656

644657
sch->ops = ops;
658+
sch->flags = ops->static_flags;
645659
sch->enqueue = ops->enqueue;
646660
sch->dequeue = ops->dequeue;
647661
sch->dev_queue = dev_queue;
648662
dev_hold(dev);
649663
refcount_set(&sch->refcnt, 1);
650664

651665
return sch;
666+
errout1:
667+
kfree(p);
652668
errout:
653669
return ERR_PTR(err);
654670
}
@@ -698,7 +714,7 @@ void qdisc_reset(struct Qdisc *qdisc)
698714
}
699715
EXPORT_SYMBOL(qdisc_reset);
700716

701-
static void qdisc_free(struct Qdisc *qdisc)
717+
void qdisc_free(struct Qdisc *qdisc)
702718
{
703719
if (qdisc_is_percpu_stats(qdisc)) {
704720
free_percpu(qdisc->cpu_bstats);

net/sched/sch_ingress.c

Lines changed: 4 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,6 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt)
6666
{
6767
struct ingress_sched_data *q = qdisc_priv(sch);
6868
struct net_device *dev = qdisc_dev(sch);
69-
int err;
7069

7170
net_inc_ingress_queue();
7271

@@ -76,13 +75,7 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt)
7675
q->block_info.chain_head_change = clsact_chain_head_change;
7776
q->block_info.chain_head_change_priv = &q->miniqp;
7877

79-
err = tcf_block_get_ext(&q->block, sch, &q->block_info);
80-
if (err)
81-
return err;
82-
83-
sch->flags |= TCQ_F_CPUSTATS;
84-
85-
return 0;
78+
return tcf_block_get_ext(&q->block, sch, &q->block_info);
8679
}
8780

8881
static void ingress_destroy(struct Qdisc *sch)
@@ -121,6 +114,7 @@ static struct Qdisc_ops ingress_qdisc_ops __read_mostly = {
121114
.cl_ops = &ingress_class_ops,
122115
.id = "ingress",
123116
.priv_size = sizeof(struct ingress_sched_data),
117+
.static_flags = TCQ_F_CPUSTATS,
124118
.init = ingress_init,
125119
.destroy = ingress_destroy,
126120
.dump = ingress_dump,
@@ -192,13 +186,7 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt)
192186
q->egress_block_info.chain_head_change = clsact_chain_head_change;
193187
q->egress_block_info.chain_head_change_priv = &q->miniqp_egress;
194188

195-
err = tcf_block_get_ext(&q->egress_block, sch, &q->egress_block_info);
196-
if (err)
197-
return err;
198-
199-
sch->flags |= TCQ_F_CPUSTATS;
200-
201-
return 0;
189+
return tcf_block_get_ext(&q->egress_block, sch, &q->egress_block_info);
202190
}
203191

204192
static void clsact_destroy(struct Qdisc *sch)
@@ -225,6 +213,7 @@ static struct Qdisc_ops clsact_qdisc_ops __read_mostly = {
225213
.cl_ops = &clsact_class_ops,
226214
.id = "clsact",
227215
.priv_size = sizeof(struct clsact_sched_data),
216+
.static_flags = TCQ_F_CPUSTATS,
228217
.init = clsact_init,
229218
.destroy = clsact_destroy,
230219
.dump = ingress_dump,

0 commit comments

Comments
 (0)