Skip to content

Commit 84c0d5e

Browse files
postgraphummakynes
authored andcommitted
ipvs: allow tunneling with gue encapsulation
ipip packets are blocked in some public cloud environments, this patch allows gue encapsulation with the tunneling method, which would make tunneling working in those environments. Signed-off-by: Jacky Hu <[email protected]> Acked-by: Julian Anastasov <[email protected]> Signed-off-by: Simon Horman <[email protected]> Signed-off-by: Pablo Neira Ayuso <[email protected]>
1 parent 227e1e4 commit 84c0d5e

File tree

4 files changed

+130
-5
lines changed

4 files changed

+130
-5
lines changed

include/net/ip_vs.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -600,6 +600,9 @@ struct ip_vs_dest_user_kern {
600600

601601
/* Address family of addr */
602602
u16 af;
603+
604+
u16 tun_type; /* tunnel type */
605+
__be16 tun_port; /* tunnel port */
603606
};
604607

605608

@@ -660,6 +663,8 @@ struct ip_vs_dest {
660663
atomic_t conn_flags; /* flags to copy to conn */
661664
atomic_t weight; /* server weight */
662665
atomic_t last_weight; /* server latest weight */
666+
__u16 tun_type; /* tunnel type */
667+
__be16 tun_port; /* tunnel port */
663668

664669
refcount_t refcnt; /* reference counter */
665670
struct ip_vs_stats stats; /* statistics */

include/uapi/linux/ip_vs.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,13 @@
124124

125125
#define IP_VS_PEDATA_MAXLEN 255
126126

127+
/* Tunnel types */
128+
enum {
129+
IP_VS_CONN_F_TUNNEL_TYPE_IPIP = 0, /* IPIP */
130+
IP_VS_CONN_F_TUNNEL_TYPE_GUE, /* GUE */
131+
IP_VS_CONN_F_TUNNEL_TYPE_MAX,
132+
};
133+
127134
/*
128135
* The struct ip_vs_service_user and struct ip_vs_dest_user are
129136
* used to set IPVS rules through setsockopt.
@@ -392,6 +399,10 @@ enum {
392399

393400
IPVS_DEST_ATTR_STATS64, /* nested attribute for dest stats */
394401

402+
IPVS_DEST_ATTR_TUN_TYPE, /* tunnel type */
403+
404+
IPVS_DEST_ATTR_TUN_PORT, /* tunnel port */
405+
395406
__IPVS_DEST_ATTR_MAX,
396407
};
397408

net/netfilter/ipvs/ip_vs_ctl.c

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -831,6 +831,10 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
831831
conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
832832
conn_flags |= IP_VS_CONN_F_INACTIVE;
833833

834+
/* set the tunnel info */
835+
dest->tun_type = udest->tun_type;
836+
dest->tun_port = udest->tun_port;
837+
834838
/* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
835839
if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
836840
conn_flags |= IP_VS_CONN_F_NOOUTPUT;
@@ -987,6 +991,13 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
987991
return -ERANGE;
988992
}
989993

994+
if (udest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
995+
if (udest->tun_port == 0) {
996+
pr_err("%s(): tunnel port is zero\n", __func__);
997+
return -EINVAL;
998+
}
999+
}
1000+
9901001
ip_vs_addr_copy(udest->af, &daddr, &udest->addr);
9911002

9921003
/* We use function that requires RCU lock */
@@ -1051,6 +1062,13 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
10511062
return -ERANGE;
10521063
}
10531064

1065+
if (udest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
1066+
if (udest->tun_port == 0) {
1067+
pr_err("%s(): tunnel port is zero\n", __func__);
1068+
return -EINVAL;
1069+
}
1070+
}
1071+
10541072
ip_vs_addr_copy(udest->af, &daddr, &udest->addr);
10551073

10561074
/* We use function that requires RCU lock */
@@ -2333,6 +2351,7 @@ static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
23332351
udest->u_threshold = udest_compat->u_threshold;
23342352
udest->l_threshold = udest_compat->l_threshold;
23352353
udest->af = AF_INET;
2354+
udest->tun_type = IP_VS_CONN_F_TUNNEL_TYPE_IPIP;
23362355
}
23372356

23382357
static int
@@ -2890,6 +2909,8 @@ static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
28902909
[IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
28912910
[IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
28922911
[IPVS_DEST_ATTR_ADDR_FAMILY] = { .type = NLA_U16 },
2912+
[IPVS_DEST_ATTR_TUN_TYPE] = { .type = NLA_U8 },
2913+
[IPVS_DEST_ATTR_TUN_PORT] = { .type = NLA_U16 },
28932914
};
28942915

28952916
static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
@@ -3193,6 +3214,10 @@ static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
31933214
IP_VS_CONN_F_FWD_MASK)) ||
31943215
nla_put_u32(skb, IPVS_DEST_ATTR_WEIGHT,
31953216
atomic_read(&dest->weight)) ||
3217+
nla_put_u8(skb, IPVS_DEST_ATTR_TUN_TYPE,
3218+
dest->tun_type) ||
3219+
nla_put_be16(skb, IPVS_DEST_ATTR_TUN_PORT,
3220+
dest->tun_port) ||
31963221
nla_put_u32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold) ||
31973222
nla_put_u32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold) ||
31983223
nla_put_u32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
@@ -3315,12 +3340,14 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
33153340
/* If a full entry was requested, check for the additional fields */
33163341
if (full_entry) {
33173342
struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
3318-
*nla_l_thresh;
3343+
*nla_l_thresh, *nla_tun_type, *nla_tun_port;
33193344

33203345
nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
33213346
nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
33223347
nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH];
33233348
nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
3349+
nla_tun_type = attrs[IPVS_DEST_ATTR_TUN_TYPE];
3350+
nla_tun_port = attrs[IPVS_DEST_ATTR_TUN_PORT];
33243351

33253352
if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
33263353
return -EINVAL;
@@ -3330,6 +3357,12 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
33303357
udest->weight = nla_get_u32(nla_weight);
33313358
udest->u_threshold = nla_get_u32(nla_u_thresh);
33323359
udest->l_threshold = nla_get_u32(nla_l_thresh);
3360+
3361+
if (nla_tun_type)
3362+
udest->tun_type = nla_get_u8(nla_tun_type);
3363+
3364+
if (nla_tun_port)
3365+
udest->tun_port = nla_get_be16(nla_tun_port);
33333366
}
33343367

33353368
return 0;

net/netfilter/ipvs/ip_vs_xmit.c

Lines changed: 80 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
#include <linux/slab.h>
3333
#include <linux/tcp.h> /* for tcphdr */
3434
#include <net/ip.h>
35+
#include <net/gue.h>
3536
#include <net/tcp.h> /* for csum_tcpudp_magic */
3637
#include <net/udp.h>
3738
#include <net/icmp.h> /* for icmp_send */
@@ -382,6 +383,10 @@ __ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
382383
mtu = dst_mtu(&rt->dst);
383384
} else {
384385
mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
386+
if (!dest)
387+
goto err_put;
388+
if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE)
389+
mtu -= sizeof(struct udphdr) + sizeof(struct guehdr);
385390
if (mtu < 68) {
386391
IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
387392
goto err_put;
@@ -533,6 +538,10 @@ __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
533538
mtu = dst_mtu(&rt->dst);
534539
else {
535540
mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr);
541+
if (!dest)
542+
goto err_put;
543+
if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE)
544+
mtu -= sizeof(struct udphdr) + sizeof(struct guehdr);
536545
if (mtu < IPV6_MIN_MTU) {
537546
IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__,
538547
IPV6_MIN_MTU);
@@ -989,6 +998,41 @@ static inline int __tun_gso_type_mask(int encaps_af, int orig_af)
989998
}
990999
}
9911000

1001+
static int
1002+
ipvs_gue_encap(struct net *net, struct sk_buff *skb,
1003+
struct ip_vs_conn *cp, __u8 *next_protocol)
1004+
{
1005+
__be16 dport;
1006+
__be16 sport = udp_flow_src_port(net, skb, 0, 0, false);
1007+
struct udphdr *udph; /* Our new UDP header */
1008+
struct guehdr *gueh; /* Our new GUE header */
1009+
1010+
skb_push(skb, sizeof(struct guehdr));
1011+
1012+
gueh = (struct guehdr *)skb->data;
1013+
1014+
gueh->control = 0;
1015+
gueh->version = 0;
1016+
gueh->hlen = 0;
1017+
gueh->flags = 0;
1018+
gueh->proto_ctype = *next_protocol;
1019+
1020+
skb_push(skb, sizeof(struct udphdr));
1021+
skb_reset_transport_header(skb);
1022+
1023+
udph = udp_hdr(skb);
1024+
1025+
dport = cp->dest->tun_port;
1026+
udph->dest = dport;
1027+
udph->source = sport;
1028+
udph->len = htons(skb->len);
1029+
udph->check = 0;
1030+
1031+
*next_protocol = IPPROTO_UDP;
1032+
1033+
return 0;
1034+
}
1035+
9921036
/*
9931037
* IP Tunneling transmitter
9941038
*
@@ -1025,6 +1069,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
10251069
struct iphdr *iph; /* Our new IP header */
10261070
unsigned int max_headroom; /* The extra header space needed */
10271071
int ret, local;
1072+
int tun_type, gso_type;
10281073

10291074
EnterFunction(10);
10301075

@@ -1046,6 +1091,11 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
10461091
*/
10471092
max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr);
10481093

1094+
tun_type = cp->dest->tun_type;
1095+
1096+
if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE)
1097+
max_headroom += sizeof(struct udphdr) + sizeof(struct guehdr);
1098+
10491099
/* We only care about the df field if sysctl_pmtu_disc(ipvs) is set */
10501100
dfp = sysctl_pmtu_disc(ipvs) ? &df : NULL;
10511101
skb = ip_vs_prepare_tunneled_skb(skb, cp->af, max_headroom,
@@ -1054,11 +1104,20 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
10541104
if (IS_ERR(skb))
10551105
goto tx_error;
10561106

1057-
if (iptunnel_handle_offloads(skb, __tun_gso_type_mask(AF_INET, cp->af)))
1107+
gso_type = __tun_gso_type_mask(AF_INET, cp->af);
1108+
if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE)
1109+
gso_type |= SKB_GSO_UDP_TUNNEL;
1110+
1111+
if (iptunnel_handle_offloads(skb, gso_type))
10581112
goto tx_error;
10591113

10601114
skb->transport_header = skb->network_header;
10611115

1116+
skb_set_inner_ipproto(skb, next_protocol);
1117+
1118+
if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE)
1119+
ipvs_gue_encap(net, skb, cp, &next_protocol);
1120+
10621121
skb_push(skb, sizeof(struct iphdr));
10631122
skb_reset_network_header(skb);
10641123
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
@@ -1102,6 +1161,8 @@ int
11021161
ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
11031162
struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
11041163
{
1164+
struct netns_ipvs *ipvs = cp->ipvs;
1165+
struct net *net = ipvs->net;
11051166
struct rt6_info *rt; /* Route to the other host */
11061167
struct in6_addr saddr; /* Source for tunnel */
11071168
struct net_device *tdev; /* Device to other host */
@@ -1112,10 +1173,11 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
11121173
struct ipv6hdr *iph; /* Our new IP header */
11131174
unsigned int max_headroom; /* The extra header space needed */
11141175
int ret, local;
1176+
int tun_type, gso_type;
11151177

11161178
EnterFunction(10);
11171179

1118-
local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest,
1180+
local = __ip_vs_get_out_rt_v6(ipvs, cp->af, skb, cp->dest,
11191181
&cp->daddr.in6,
11201182
&saddr, ipvsh, 1,
11211183
IP_VS_RT_MODE_LOCAL |
@@ -1134,17 +1196,31 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
11341196
*/
11351197
max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr);
11361198

1199+
tun_type = cp->dest->tun_type;
1200+
1201+
if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE)
1202+
max_headroom += sizeof(struct udphdr) + sizeof(struct guehdr);
1203+
11371204
skb = ip_vs_prepare_tunneled_skb(skb, cp->af, max_headroom,
11381205
&next_protocol, &payload_len,
11391206
&dsfield, &ttl, NULL);
11401207
if (IS_ERR(skb))
11411208
goto tx_error;
11421209

1143-
if (iptunnel_handle_offloads(skb, __tun_gso_type_mask(AF_INET6, cp->af)))
1210+
gso_type = __tun_gso_type_mask(AF_INET6, cp->af);
1211+
if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE)
1212+
gso_type |= SKB_GSO_UDP_TUNNEL;
1213+
1214+
if (iptunnel_handle_offloads(skb, gso_type))
11441215
goto tx_error;
11451216

11461217
skb->transport_header = skb->network_header;
11471218

1219+
skb_set_inner_ipproto(skb, next_protocol);
1220+
1221+
if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE)
1222+
ipvs_gue_encap(net, skb, cp, &next_protocol);
1223+
11481224
skb_push(skb, sizeof(struct ipv6hdr));
11491225
skb_reset_network_header(skb);
11501226
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
@@ -1167,7 +1243,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
11671243

11681244
ret = ip_vs_tunnel_xmit_prepare(skb, cp);
11691245
if (ret == NF_ACCEPT)
1170-
ip6_local_out(cp->ipvs->net, skb->sk, skb);
1246+
ip6_local_out(net, skb->sk, skb);
11711247
else if (ret == NF_DROP)
11721248
kfree_skb(skb);
11731249

0 commit comments

Comments
 (0)