Skip to content

Commit d349f99

Browse files
Cong Wangkuba-moo
authored andcommitted
net_sched: fix RTNL deadlock again caused by request_module()
tcf_action_init_1() loads tc action modules automatically with request_module() after parsing the tc action names, and it drops RTNL lock and re-holds it before and after request_module(). This causes a lot of troubles, as discovered by syzbot, because we can be in the middle of batch initializations when we create an array of tc actions. One of the problem is deadlock: CPU 0 CPU 1 rtnl_lock(); for (...) { tcf_action_init_1(); -> rtnl_unlock(); -> request_module(); rtnl_lock(); for (...) { tcf_action_init_1(); -> tcf_idr_check_alloc(); // Insert one action into idr, // but it is not committed until // tcf_idr_insert_many(), then drop // the RTNL lock in the _next_ // iteration -> rtnl_unlock(); -> rtnl_lock(); -> a_o->init(); -> tcf_idr_check_alloc(); // Now waiting for the same index // to be committed -> request_module(); -> rtnl_lock() // Now waiting for RTNL lock } rtnl_unlock(); } rtnl_unlock(); This is not easy to solve, we can move the request_module() before this loop and pre-load all the modules we need for this netlink message and then do the rest initializations. So the loop breaks down to two now: for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) { struct tc_action_ops *a_o; a_o = tc_action_load_ops(name, tb[i]...); ops[i - 1] = a_o; } for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) { act = tcf_action_init_1(ops[i - 1]...); } Although this looks serious, it only has been reported by syzbot, so it seems hard to trigger this by humans. And given the size of this patch, I'd suggest to make it to net-next and not to backport to stable. This patch has been tested by syzbot and tested with tdc.py by me. Fixes: 0fedc63 ("net_sched: commit action insertions together") Reported-and-tested-by: [email protected] Reported-and-tested-by: [email protected] Reported-by: [email protected] Cc: Jiri Pirko <[email protected]> Signed-off-by: Cong Wang <[email protected]> Tested-by: Jamal Hadi Salim <[email protected]> Acked-by: Jamal Hadi Salim <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Jakub Kicinski <[email protected]>
1 parent 6ea9309 commit d349f99

File tree

3 files changed

+79
-41
lines changed

3 files changed

+79
-41
lines changed

include/net/act_api.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -186,10 +186,13 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
186186
struct nlattr *est, char *name, int ovr, int bind,
187187
struct tc_action *actions[], size_t *attr_size,
188188
bool rtnl_held, struct netlink_ext_ack *extack);
189+
struct tc_action_ops *tc_action_load_ops(char *name, struct nlattr *nla,
190+
bool rtnl_held,
191+
struct netlink_ext_ack *extack);
189192
struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
190193
struct nlattr *nla, struct nlattr *est,
191194
char *name, int ovr, int bind,
192-
bool rtnl_held,
195+
struct tc_action_ops *ops, bool rtnl_held,
193196
struct netlink_ext_ack *extack);
194197
int tcf_action_dump(struct sk_buff *skb, struct tc_action *actions[], int bind,
195198
int ref, bool terse);

net/sched/act_api.c

Lines changed: 66 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -928,53 +928,35 @@ static void tcf_idr_insert_many(struct tc_action *actions[])
928928
}
929929
}
930930

931-
struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
932-
struct nlattr *nla, struct nlattr *est,
933-
char *name, int ovr, int bind,
934-
bool rtnl_held,
935-
struct netlink_ext_ack *extack)
931+
struct tc_action_ops *tc_action_load_ops(char *name, struct nlattr *nla,
932+
bool rtnl_held,
933+
struct netlink_ext_ack *extack)
936934
{
937-
struct nla_bitfield32 flags = { 0, 0 };
938-
u8 hw_stats = TCA_ACT_HW_STATS_ANY;
939-
struct tc_action *a;
935+
struct nlattr *tb[TCA_ACT_MAX + 1];
940936
struct tc_action_ops *a_o;
941-
struct tc_cookie *cookie = NULL;
942937
char act_name[IFNAMSIZ];
943-
struct nlattr *tb[TCA_ACT_MAX + 1];
944938
struct nlattr *kind;
945939
int err;
946940

947941
if (name == NULL) {
948942
err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla,
949943
tcf_action_policy, extack);
950944
if (err < 0)
951-
goto err_out;
945+
return ERR_PTR(err);
952946
err = -EINVAL;
953947
kind = tb[TCA_ACT_KIND];
954948
if (!kind) {
955949
NL_SET_ERR_MSG(extack, "TC action kind must be specified");
956-
goto err_out;
950+
return ERR_PTR(err);
957951
}
958952
if (nla_strscpy(act_name, kind, IFNAMSIZ) < 0) {
959953
NL_SET_ERR_MSG(extack, "TC action name too long");
960-
goto err_out;
961-
}
962-
if (tb[TCA_ACT_COOKIE]) {
963-
cookie = nla_memdup_cookie(tb);
964-
if (!cookie) {
965-
NL_SET_ERR_MSG(extack, "No memory to generate TC cookie");
966-
err = -ENOMEM;
967-
goto err_out;
968-
}
954+
return ERR_PTR(err);
969955
}
970-
hw_stats = tcf_action_hw_stats_get(tb[TCA_ACT_HW_STATS]);
971-
if (tb[TCA_ACT_FLAGS])
972-
flags = nla_get_bitfield32(tb[TCA_ACT_FLAGS]);
973956
} else {
974957
if (strlcpy(act_name, name, IFNAMSIZ) >= IFNAMSIZ) {
975958
NL_SET_ERR_MSG(extack, "TC action name too long");
976-
err = -EINVAL;
977-
goto err_out;
959+
return ERR_PTR(-EINVAL);
978960
}
979961
}
980962

@@ -996,24 +978,56 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
996978
* indicate this using -EAGAIN.
997979
*/
998980
if (a_o != NULL) {
999-
err = -EAGAIN;
1000-
goto err_mod;
981+
module_put(a_o->owner);
982+
return ERR_PTR(-EAGAIN);
1001983
}
1002984
#endif
1003985
NL_SET_ERR_MSG(extack, "Failed to load TC action module");
1004-
err = -ENOENT;
1005-
goto err_free;
986+
return ERR_PTR(-ENOENT);
1006987
}
1007988

989+
return a_o;
990+
}
991+
992+
struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
993+
struct nlattr *nla, struct nlattr *est,
994+
char *name, int ovr, int bind,
995+
struct tc_action_ops *a_o, bool rtnl_held,
996+
struct netlink_ext_ack *extack)
997+
{
998+
struct nla_bitfield32 flags = { 0, 0 };
999+
u8 hw_stats = TCA_ACT_HW_STATS_ANY;
1000+
struct nlattr *tb[TCA_ACT_MAX + 1];
1001+
struct tc_cookie *cookie = NULL;
1002+
struct tc_action *a;
1003+
int err;
1004+
10081005
/* backward compatibility for policer */
1009-
if (name == NULL)
1006+
if (name == NULL) {
1007+
err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla,
1008+
tcf_action_policy, extack);
1009+
if (err < 0)
1010+
return ERR_PTR(err);
1011+
if (tb[TCA_ACT_COOKIE]) {
1012+
cookie = nla_memdup_cookie(tb);
1013+
if (!cookie) {
1014+
NL_SET_ERR_MSG(extack, "No memory to generate TC cookie");
1015+
err = -ENOMEM;
1016+
goto err_out;
1017+
}
1018+
}
1019+
hw_stats = tcf_action_hw_stats_get(tb[TCA_ACT_HW_STATS]);
1020+
if (tb[TCA_ACT_FLAGS])
1021+
flags = nla_get_bitfield32(tb[TCA_ACT_FLAGS]);
1022+
10101023
err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, ovr, bind,
10111024
rtnl_held, tp, flags.value, extack);
1012-
else
1025+
} else {
10131026
err = a_o->init(net, nla, est, &a, ovr, bind, rtnl_held,
10141027
tp, flags.value, extack);
1028+
}
10151029
if (err < 0)
1016-
goto err_mod;
1030+
goto err_out;
10171031

10181032
if (!name && tb[TCA_ACT_COOKIE])
10191033
tcf_set_action_cookie(&a->act_cookie, cookie);
@@ -1030,14 +1044,11 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
10301044

10311045
return a;
10321046

1033-
err_mod:
1034-
module_put(a_o->owner);
1035-
err_free:
1047+
err_out:
10361048
if (cookie) {
10371049
kfree(cookie->data);
10381050
kfree(cookie);
10391051
}
1040-
err_out:
10411052
return ERR_PTR(err);
10421053
}
10431054

@@ -1048,6 +1059,7 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
10481059
struct tc_action *actions[], size_t *attr_size,
10491060
bool rtnl_held, struct netlink_ext_ack *extack)
10501061
{
1062+
struct tc_action_ops *ops[TCA_ACT_MAX_PRIO] = {};
10511063
struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
10521064
struct tc_action *act;
10531065
size_t sz = 0;
@@ -1059,9 +1071,20 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
10591071
if (err < 0)
10601072
return err;
10611073

1074+
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
1075+
struct tc_action_ops *a_o;
1076+
1077+
a_o = tc_action_load_ops(name, tb[i], rtnl_held, extack);
1078+
if (IS_ERR(a_o)) {
1079+
err = PTR_ERR(a_o);
1080+
goto err_mod;
1081+
}
1082+
ops[i - 1] = a_o;
1083+
}
1084+
10621085
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
10631086
act = tcf_action_init_1(net, tp, tb[i], est, name, ovr, bind,
1064-
rtnl_held, extack);
1087+
ops[i - 1], rtnl_held, extack);
10651088
if (IS_ERR(act)) {
10661089
err = PTR_ERR(act);
10671090
goto err;
@@ -1081,6 +1104,11 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
10811104

10821105
err:
10831106
tcf_action_destroy(actions, bind);
1107+
err_mod:
1108+
for (i = 0; i < TCA_ACT_MAX_PRIO; i++) {
1109+
if (ops[i])
1110+
module_put(ops[i]->owner);
1111+
}
10841112
return err;
10851113
}
10861114

net/sched/cls_api.c

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3043,12 +3043,19 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
30433043
size_t attr_size = 0;
30443044

30453045
if (exts->police && tb[exts->police]) {
3046+
struct tc_action_ops *a_o;
3047+
3048+
a_o = tc_action_load_ops("police", tb[exts->police], rtnl_held, extack);
3049+
if (IS_ERR(a_o))
3050+
return PTR_ERR(a_o);
30463051
act = tcf_action_init_1(net, tp, tb[exts->police],
30473052
rate_tlv, "police", ovr,
3048-
TCA_ACT_BIND, rtnl_held,
3053+
TCA_ACT_BIND, a_o, rtnl_held,
30493054
extack);
3050-
if (IS_ERR(act))
3055+
if (IS_ERR(act)) {
3056+
module_put(a_o->owner);
30513057
return PTR_ERR(act);
3058+
}
30523059

30533060
act->type = exts->type = TCA_OLD_COMPAT;
30543061
exts->actions[0] = act;

0 commit comments

Comments
 (0)