Skip to content

Commit a54e20b

Browse files
hadarhenziondavem330
authored andcommitted
net/mlx5e: Add basic TC tunnel set action for SRIOV offloads
In mlx5 HW, encapsulation is offloaded by the steering rule having index into an encapsulation table containing the entire set of headers to be added by the HW. The driver sets these headers in a buffer when we are offloading the action. The code maintains mlx5_encap_entry for each encap header it has encountered when attempted to offload TC tunnel set action. This entry maintains a linked list of all the flows sharing the same encap header, when the last flow is removed from the list the encap entry is removed. The actual encap_header is allocated by the driver in the hardware only if we have layer two neighbour info when the encap entry is created. While the flow is in the driver, the driver holds a reference on the neighbour. When a new flow with encap action is inserted, the code first checks if the required encap entry exists according to the tunnel set parameters. If it does the encap is shared, otherwise a new mlx5_encap_entry is created. TC action parsing implementation in the driver assumes that tunnel set action is provided in the same order set by the user, e.g before the mirred_redirect action. Signed-off-by: Hadar Hen Zion <[email protected]> Signed-off-by: Saeed Mahameed <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 4a25730 commit a54e20b

File tree

4 files changed

+312
-7
lines changed

4 files changed

+312
-7
lines changed

drivers/net/ethernet/mellanox/mlx5/core/en_tc.c

Lines changed: 288 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
#include <net/tc_act/tc_mirred.h>
4242
#include <net/tc_act/tc_vlan.h>
4343
#include <net/tc_act/tc_tunnel_key.h>
44+
#include <net/vxlan.h>
4445
#include "en.h"
4546
#include "en_tc.h"
4647
#include "eswitch.h"
@@ -50,9 +51,15 @@ struct mlx5e_tc_flow {
5051
struct rhash_head node;
5152
u64 cookie;
5253
struct mlx5_flow_handle *rule;
54+
struct list_head encap; /* flows sharing the same encap */
5355
struct mlx5_esw_flow_attr *attr;
5456
};
5557

58+
enum {
59+
MLX5_HEADER_TYPE_VXLAN = 0x0,
60+
MLX5_HEADER_TYPE_NVGRE = 0x1,
61+
};
62+
5663
#define MLX5E_TC_TABLE_NUM_ENTRIES 1024
5764
#define MLX5E_TC_TABLE_NUM_GROUPS 4
5865

@@ -538,11 +545,243 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
538545
return 0;
539546
}
540547

548+
static inline int cmp_encap_info(struct mlx5_encap_info *a,
549+
struct mlx5_encap_info *b)
550+
{
551+
return memcmp(a, b, sizeof(*a));
552+
}
553+
554+
static inline int hash_encap_info(struct mlx5_encap_info *info)
555+
{
556+
return jhash(info, sizeof(*info), 0);
557+
}
558+
559+
static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv,
560+
struct net_device *mirred_dev,
561+
struct net_device **out_dev,
562+
struct flowi4 *fl4,
563+
struct neighbour **out_n,
564+
__be32 *saddr,
565+
int *out_ttl)
566+
{
567+
struct rtable *rt;
568+
struct neighbour *n = NULL;
569+
int ttl;
570+
571+
#if IS_ENABLED(CONFIG_INET)
572+
rt = ip_route_output_key(dev_net(mirred_dev), fl4);
573+
if (IS_ERR(rt)) {
574+
pr_warn("%s: no route to %pI4\n", __func__, &fl4->daddr);
575+
return -EOPNOTSUPP;
576+
}
577+
#else
578+
return -EOPNOTSUPP;
579+
#endif
580+
581+
if (!switchdev_port_same_parent_id(priv->netdev, rt->dst.dev)) {
582+
pr_warn("%s: Can't offload the flow, netdevices aren't on the same HW e-switch\n",
583+
__func__);
584+
ip_rt_put(rt);
585+
return -EOPNOTSUPP;
586+
}
587+
588+
ttl = ip4_dst_hoplimit(&rt->dst);
589+
n = dst_neigh_lookup(&rt->dst, &fl4->daddr);
590+
ip_rt_put(rt);
591+
if (!n)
592+
return -ENOMEM;
593+
594+
*out_n = n;
595+
*saddr = fl4->saddr;
596+
*out_ttl = ttl;
597+
*out_dev = rt->dst.dev;
598+
599+
return 0;
600+
}
601+
602+
static int gen_vxlan_header_ipv4(struct net_device *out_dev,
603+
char buf[],
604+
unsigned char h_dest[ETH_ALEN],
605+
int ttl,
606+
__be32 daddr,
607+
__be32 saddr,
608+
__be16 udp_dst_port,
609+
__be32 vx_vni)
610+
{
611+
int encap_size = VXLAN_HLEN + sizeof(struct iphdr) + ETH_HLEN;
612+
struct ethhdr *eth = (struct ethhdr *)buf;
613+
struct iphdr *ip = (struct iphdr *)((char *)eth + sizeof(struct ethhdr));
614+
struct udphdr *udp = (struct udphdr *)((char *)ip + sizeof(struct iphdr));
615+
struct vxlanhdr *vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr));
616+
617+
memset(buf, 0, encap_size);
618+
619+
ether_addr_copy(eth->h_dest, h_dest);
620+
ether_addr_copy(eth->h_source, out_dev->dev_addr);
621+
eth->h_proto = htons(ETH_P_IP);
622+
623+
ip->daddr = daddr;
624+
ip->saddr = saddr;
625+
626+
ip->ttl = ttl;
627+
ip->protocol = IPPROTO_UDP;
628+
ip->version = 0x4;
629+
ip->ihl = 0x5;
630+
631+
udp->dest = udp_dst_port;
632+
vxh->vx_flags = VXLAN_HF_VNI;
633+
vxh->vx_vni = vxlan_vni_field(vx_vni);
634+
635+
return encap_size;
636+
}
637+
638+
static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv,
639+
struct net_device *mirred_dev,
640+
struct mlx5_encap_entry *e,
641+
struct net_device **out_dev)
642+
{
643+
int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
644+
struct flowi4 fl4 = {};
645+
struct neighbour *n;
646+
char *encap_header;
647+
int encap_size;
648+
__be32 saddr;
649+
int ttl;
650+
int err;
651+
652+
encap_header = kzalloc(max_encap_size, GFP_KERNEL);
653+
if (!encap_header)
654+
return -ENOMEM;
655+
656+
switch (e->tunnel_type) {
657+
case MLX5_HEADER_TYPE_VXLAN:
658+
fl4.flowi4_proto = IPPROTO_UDP;
659+
fl4.fl4_dport = e->tun_info.tp_dst;
660+
break;
661+
default:
662+
err = -EOPNOTSUPP;
663+
goto out;
664+
}
665+
fl4.daddr = e->tun_info.daddr;
666+
667+
err = mlx5e_route_lookup_ipv4(priv, mirred_dev, out_dev,
668+
&fl4, &n, &saddr, &ttl);
669+
if (err)
670+
goto out;
671+
672+
e->n = n;
673+
e->out_dev = *out_dev;
674+
675+
if (!(n->nud_state & NUD_VALID)) {
676+
err = -ENOTSUPP;
677+
goto out;
678+
}
679+
680+
neigh_ha_snapshot(e->h_dest, n, *out_dev);
681+
682+
switch (e->tunnel_type) {
683+
case MLX5_HEADER_TYPE_VXLAN:
684+
encap_size = gen_vxlan_header_ipv4(*out_dev, encap_header,
685+
e->h_dest, ttl,
686+
e->tun_info.daddr,
687+
saddr, e->tun_info.tp_dst,
688+
e->tun_info.tun_id);
689+
break;
690+
default:
691+
err = -EOPNOTSUPP;
692+
goto out;
693+
}
694+
695+
err = mlx5_encap_alloc(priv->mdev, e->tunnel_type,
696+
encap_size, encap_header, &e->encap_id);
697+
out:
698+
kfree(encap_header);
699+
return err;
700+
}
701+
702+
static int mlx5e_attach_encap(struct mlx5e_priv *priv,
703+
struct ip_tunnel_info *tun_info,
704+
struct net_device *mirred_dev,
705+
struct mlx5_esw_flow_attr *attr)
706+
{
707+
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
708+
unsigned short family = ip_tunnel_info_af(tun_info);
709+
struct ip_tunnel_key *key = &tun_info->key;
710+
struct mlx5_encap_info info;
711+
struct mlx5_encap_entry *e;
712+
struct net_device *out_dev;
713+
uintptr_t hash_key;
714+
bool found = false;
715+
int tunnel_type;
716+
int err;
717+
718+
/* udp dst port must be given */
719+
if (!memchr_inv(&key->tp_dst, 0, sizeof(key->tp_dst)))
720+
return -EOPNOTSUPP;
721+
722+
if (mlx5e_vxlan_lookup_port(priv, be16_to_cpu(key->tp_dst)) &&
723+
MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) {
724+
info.tp_dst = key->tp_dst;
725+
info.tun_id = tunnel_id_to_key32(key->tun_id);
726+
tunnel_type = MLX5_HEADER_TYPE_VXLAN;
727+
} else {
728+
return -EOPNOTSUPP;
729+
}
730+
731+
switch (family) {
732+
case AF_INET:
733+
info.daddr = key->u.ipv4.dst;
734+
break;
735+
default:
736+
return -EOPNOTSUPP;
737+
}
738+
739+
hash_key = hash_encap_info(&info);
740+
741+
hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
742+
encap_hlist, hash_key) {
743+
if (!cmp_encap_info(&e->tun_info, &info)) {
744+
found = true;
745+
break;
746+
}
747+
}
748+
749+
if (found) {
750+
attr->encap = e;
751+
return 0;
752+
}
753+
754+
e = kzalloc(sizeof(*e), GFP_KERNEL);
755+
if (!e)
756+
return -ENOMEM;
757+
758+
e->tun_info = info;
759+
e->tunnel_type = tunnel_type;
760+
INIT_LIST_HEAD(&e->flows);
761+
762+
err = mlx5e_create_encap_header_ipv4(priv, mirred_dev, e, &out_dev);
763+
if (err)
764+
goto out_err;
765+
766+
attr->encap = e;
767+
hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
768+
769+
return err;
770+
771+
out_err:
772+
kfree(e);
773+
return err;
774+
}
775+
541776
static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
542-
struct mlx5_esw_flow_attr *attr)
777+
struct mlx5e_tc_flow *flow)
543778
{
779+
struct mlx5_esw_flow_attr *attr = flow->attr;
780+
struct ip_tunnel_info *info = NULL;
544781
const struct tc_action *a;
545782
LIST_HEAD(actions);
783+
bool encap = false;
784+
int err;
546785

547786
if (tc_no_actions(exts))
548787
return -EINVAL;
@@ -565,16 +804,37 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
565804

566805
out_dev = __dev_get_by_index(dev_net(priv->netdev), ifindex);
567806

568-
if (!switchdev_port_same_parent_id(priv->netdev, out_dev)) {
807+
if (switchdev_port_same_parent_id(priv->netdev,
808+
out_dev)) {
809+
attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
810+
MLX5_FLOW_CONTEXT_ACTION_COUNT;
811+
out_priv = netdev_priv(out_dev);
812+
attr->out_rep = out_priv->ppriv;
813+
} else if (encap) {
814+
err = mlx5e_attach_encap(priv, info,
815+
out_dev, attr);
816+
if (err)
817+
return err;
818+
list_add(&flow->encap, &attr->encap->flows);
819+
attr->action |= MLX5_FLOW_CONTEXT_ACTION_ENCAP |
820+
MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
821+
MLX5_FLOW_CONTEXT_ACTION_COUNT;
822+
out_priv = netdev_priv(attr->encap->out_dev);
823+
attr->out_rep = out_priv->ppriv;
824+
} else {
569825
pr_err("devices %s %s not on same switch HW, can't offload forwarding\n",
570826
priv->netdev->name, out_dev->name);
571827
return -EINVAL;
572828
}
829+
continue;
830+
}
573831

574-
attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
575-
MLX5_FLOW_CONTEXT_ACTION_COUNT;
576-
out_priv = netdev_priv(out_dev);
577-
attr->out_rep = out_priv->ppriv;
832+
if (is_tcf_tunnel_set(a)) {
833+
info = tcf_tunnel_info(a);
834+
if (info)
835+
encap = true;
836+
else
837+
return -EOPNOTSUPP;
578838
continue;
579839
}
580840

@@ -644,7 +904,7 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol,
644904

645905
if (fdb_flow) {
646906
flow->attr = (struct mlx5_esw_flow_attr *)(flow + 1);
647-
err = parse_tc_fdb_actions(priv, f->exts, flow->attr);
907+
err = parse_tc_fdb_actions(priv, f->exts, flow);
648908
if (err < 0)
649909
goto err_free;
650910
flow->rule = mlx5e_tc_add_fdb_flow(priv, spec, flow->attr);
@@ -681,6 +941,24 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol,
681941
return err;
682942
}
683943

944+
static void mlx5e_detach_encap(struct mlx5e_priv *priv,
945+
struct mlx5e_tc_flow *flow) {
946+
struct list_head *next = flow->encap.next;
947+
948+
list_del(&flow->encap);
949+
if (list_empty(next)) {
950+
struct mlx5_encap_entry *e;
951+
952+
e = list_entry(next, struct mlx5_encap_entry, flows);
953+
if (e->n) {
954+
mlx5_encap_dealloc(priv->mdev, e->encap_id);
955+
neigh_release(e->n);
956+
}
957+
hlist_del_rcu(&e->encap_hlist);
958+
kfree(e);
959+
}
960+
}
961+
684962
int mlx5e_delete_flower(struct mlx5e_priv *priv,
685963
struct tc_cls_flower_offload *f)
686964
{
@@ -696,6 +974,9 @@ int mlx5e_delete_flower(struct mlx5e_priv *priv,
696974

697975
mlx5e_tc_del_flow(priv, flow->rule, flow->attr);
698976

977+
if (flow->attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP)
978+
mlx5e_detach_encap(priv, flow);
979+
699980
kfree(flow);
700981

701982
return 0;

drivers/net/ethernet/mellanox/mlx5/core/eswitch.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1782,6 +1782,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
17821782
goto abort;
17831783
}
17841784

1785+
hash_init(esw->offloads.encap_tbl);
17851786
mutex_init(&esw->state_lock);
17861787

17871788
for (vport_num = 0; vport_num < total_vports; vport_num++) {

0 commit comments

Comments
 (0)