Skip to content

Commit 0910c99

Browse files
Yuval ShaiaMukesh Kacker
authored andcommitted
IB/ipoib: CSUM support in connected mode
This enhancement suggest the usage of IB CRC instead of CSUM in IPoIB CM. IPoIB CM uses RC (Reliable Connection) which guarantees the corruption free delivery of the packet. InfiniBand uses 32b CRC which provides stronger data integrity protection compare to 16b IP Checksum. So, there is no added value that IP/TCP Checksum provides in the IB world. The proposal is to tell network stack that IPoIB-CM supports IP Checksum offload. This enables the kernel to save the time of checksum calculation of IPoIB CM packets. Network sends the IP packet without adding the IP Checksum to the header. On the receive side, IPoIB driver again tells the network stack that IP Checksum is good for the incoming packets and network stack avoids the IP Checksum calculations. During connection establishment the driver determine if peer supports IB CRC as checksum. This is done so driver will be able to calculate checksum before transmiting the packet in case the peer does not support this feature. Orabug: 20559068 Tested-Acked-by: Santosh Shilimkar <[email protected]> Reviewed-by: Mukesh Kacker <[email protected]> Reviewed-by: Ajaykumar Hotchandani <[email protected]> Signed-off-by: Yuval Shaia <[email protected]>
1 parent ad2c457 commit 0910c99

File tree

3 files changed

+89
-3
lines changed

3 files changed

+89
-3
lines changed

drivers/infiniband/ulp/ipoib/ipoib.h

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ enum {
9292
IPOIB_FLAG_UMCAST = 10,
9393
IPOIB_STOP_NEIGH_GC = 11,
9494
IPOIB_NEIGH_TBL_FLUSH = 12,
95+
IPOIB_FLAG_CSUM = 15,
9596

9697
IPOIB_MAX_BACKOFF_SECONDS = 16,
9798

@@ -185,9 +186,20 @@ struct ipoib_pmtu_update {
185186

186187
struct ib_cm_id;
187188

189+
/* Signature so driver can make sure ipoib_cm_data.caps is valid */
190+
#define IPOIB_CM_PROTO_SIG 0x2211
191+
/* Current driver ipoib_cm_data version */
192+
#define IPOIB_CM_PROTO_VER (1UL << 12)
193+
194+
enum ipoib_cm_data_caps {
195+
IPOIB_CM_CAPS_IBCRC_AS_CSUM = 1UL << 0,
196+
};
197+
188198
struct ipoib_cm_data {
189199
__be32 qpn; /* High byte MUST be ignored on receive */
190200
__be32 mtu;
201+
__be16 sig; /* must be IPOIB_CM_PROTO_SIG */
202+
__be16 caps; /* 4 bits proto ver and 12 bits capabilities */
191203
};
192204

193205
/*
@@ -232,6 +244,7 @@ struct ipoib_cm_rx {
232244
unsigned long jiffies;
233245
enum ipoib_cm_state state;
234246
int recv_count;
247+
u16 caps;
235248
};
236249

237250
struct ipoib_cm_tx {
@@ -246,6 +259,7 @@ struct ipoib_cm_tx {
246259
unsigned tx_tail;
247260
unsigned long flags;
248261
u32 mtu;
262+
u16 caps;
249263
};
250264

251265
struct ipoib_cm_rx_buf {
@@ -454,8 +468,20 @@ void ipoib_del_neighs_by_gid(struct net_device *dev, u8 *gid);
454468

455469
extern struct workqueue_struct *ipoib_workqueue;
456470

471+
extern int cm_ibcrc_as_csum;
472+
457473
/* functions */
458474

475+
static inline int ipoib_cm_check_proto_sig(u16 proto_sig)
476+
{
477+
return (proto_sig == IPOIB_CM_PROTO_SIG);
478+
}
479+
480+
static inline int ipoib_cm_check_proto_ver(u16 caps)
481+
{
482+
return ((caps & 0xF000) == IPOIB_CM_PROTO_VER);
483+
}
484+
459485
int ipoib_poll(struct napi_struct *napi, int budget);
460486
void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr);
461487
void ipoib_send_comp_handler(struct ib_cq *cq, void *dev_ptr);

drivers/infiniband/ulp/ipoib/ipoib_cm.c

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -448,9 +448,16 @@ static int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id,
448448
struct ipoib_dev_priv *priv = netdev_priv(dev);
449449
struct ipoib_cm_data data = {};
450450
struct ib_cm_rep_param rep = {};
451+
u16 caps = 0;
452+
453+
caps |= IPOIB_CM_PROTO_VER;
454+
if (cm_ibcrc_as_csum && test_bit(IPOIB_FLAG_CSUM, &priv->flags))
455+
caps |= IPOIB_CM_CAPS_IBCRC_AS_CSUM;
451456

452457
data.qpn = cpu_to_be32(priv->qp->qp_num);
453458
data.mtu = cpu_to_be32(IPOIB_CM_BUF_SIZE);
459+
data.sig = cpu_to_be16(IPOIB_CM_PROTO_SIG);
460+
data.caps = cpu_to_be16(caps);
454461

455462
rep.private_data = &data;
456463
rep.private_data_len = sizeof data;
@@ -469,6 +476,7 @@ static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
469476
struct ipoib_cm_rx *p;
470477
unsigned psn;
471478
int ret;
479+
struct ipoib_cm_data *cm_data;
472480

473481
ipoib_dbg(priv, "REQ arrived\n");
474482
p = kzalloc(sizeof *p, GFP_KERNEL);
@@ -487,6 +495,13 @@ static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
487495
goto err_qp;
488496
}
489497

498+
cm_data = (struct ipoib_cm_data *)event->private_data;
499+
ipoib_dbg(priv, "Otherend sig=0x%x\n", be16_to_cpu(cm_data->sig));
500+
if (ipoib_cm_check_proto_sig(be16_to_cpu(cm_data->sig)) &&
501+
ipoib_cm_check_proto_ver(be16_to_cpu(cm_data->caps)))
502+
p->caps = be16_to_cpu(cm_data->caps);
503+
ipoib_dbg(priv, "Otherend caps=0x%x\n", p->caps);
504+
490505
psn = prandom_u32() & 0xffffff;
491506
ret = ipoib_cm_modify_rx_qp(dev, cm_id, p->qp, psn);
492507
if (ret)
@@ -697,6 +712,10 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
697712
skb->dev = dev;
698713
/* XXX get correct PACKET_ type here */
699714
skb->pkt_type = PACKET_HOST;
715+
716+
if (cm_ibcrc_as_csum && test_bit(IPOIB_FLAG_CSUM, &priv->flags))
717+
skb->ip_summed = CHECKSUM_UNNECESSARY;
718+
700719
netif_receive_skb(skb);
701720

702721
repost:
@@ -789,6 +808,18 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_
789808
tx_req = &tx->tx_ring[tx->tx_head & (ipoib_sendq_size - 1)];
790809
tx_req->skb = skb;
791810

811+
/* Calculate checksum if we support ibcrc_as_csum but peer is not */
812+
if ((skb->ip_summed == CHECKSUM_PARTIAL) && cm_ibcrc_as_csum &&
813+
test_bit(IPOIB_FLAG_CSUM, &priv->flags) &&
814+
!(tx->caps & IPOIB_CM_CAPS_IBCRC_AS_CSUM)) {
815+
if (skb_checksum_help(skb)) {
816+
ipoib_warn(priv, "Fail to csum skb\n");
817+
++dev->stats.tx_errors;
818+
dev_kfree_skb_any(skb);
819+
return;
820+
}
821+
}
822+
792823
if (skb_shinfo(skb)->nr_frags) {
793824
if (unlikely(ipoib_dma_map_tx(priv->ca, tx_req))) {
794825
++dev->stats.tx_errors;
@@ -1032,6 +1063,7 @@ static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
10321063
struct ib_qp_attr qp_attr;
10331064
int qp_attr_mask, ret;
10341065
struct sk_buff *skb;
1066+
struct ipoib_cm_data *cm_data;
10351067

10361068
p->mtu = be32_to_cpu(data->mtu);
10371069

@@ -1041,6 +1073,13 @@ static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
10411073
return -EINVAL;
10421074
}
10431075

1076+
cm_data = (struct ipoib_cm_data *)event->private_data;
1077+
ipoib_dbg(priv, "Otherend sig=0x%x\n", be16_to_cpu(cm_data->sig));
1078+
if (ipoib_cm_check_proto_sig(be16_to_cpu(cm_data->sig)) &&
1079+
ipoib_cm_check_proto_ver(be16_to_cpu(cm_data->caps)))
1080+
p->caps = be16_to_cpu(cm_data->caps);
1081+
ipoib_dbg(priv, "Otherend caps=0x%x\n", p->caps);
1082+
10441083
qp_attr.qp_state = IB_QPS_RTR;
10451084
ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask);
10461085
if (ret) {
@@ -1133,9 +1172,16 @@ static int ipoib_cm_send_req(struct net_device *dev,
11331172
struct ipoib_dev_priv *priv = netdev_priv(dev);
11341173
struct ipoib_cm_data data = {};
11351174
struct ib_cm_req_param req = {};
1175+
u16 caps = 0;
1176+
1177+
caps |= IPOIB_CM_PROTO_VER;
1178+
if (cm_ibcrc_as_csum && test_bit(IPOIB_FLAG_CSUM, &priv->flags))
1179+
caps |= IPOIB_CM_CAPS_IBCRC_AS_CSUM;
11361180

11371181
data.qpn = cpu_to_be32(priv->qp->qp_num);
11381182
data.mtu = cpu_to_be32(IPOIB_CM_BUF_SIZE);
1183+
data.sig = cpu_to_be16(IPOIB_CM_PROTO_SIG);
1184+
data.caps = cpu_to_be16(caps);
11391185

11401186
req.primary_path = pathrec;
11411187
req.alternate_path = NULL;

drivers/infiniband/ulp/ipoib/ipoib_main.c

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,11 @@ module_param_named(debug_level, ipoib_debug_level, int, 0644);
7474
MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0");
7575
#endif
7676

77+
int cm_ibcrc_as_csum = 1;
78+
module_param_named(cm_ibcrc_as_csum, cm_ibcrc_as_csum, int, 0444);
79+
MODULE_PARM_DESC(cm_ibcrc_as_csum,
80+
"Indicates whether to utilize IB-CRC as CSUM in connected mode,(default: 1)");
81+
7782
struct ipoib_path_iter {
7883
struct net_device *dev;
7984
struct ipoib_path path;
@@ -197,8 +202,12 @@ static netdev_features_t ipoib_fix_features(struct net_device *dev, netdev_featu
197202
{
198203
struct ipoib_dev_priv *priv = netdev_priv(dev);
199204

200-
if (test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags))
201-
features &= ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
205+
if (test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags)) {
206+
features &= ~NETIF_F_TSO;
207+
if (!(cm_ibcrc_as_csum && (test_bit(IPOIB_FLAG_CSUM,
208+
&priv->flags))))
209+
features &= ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
210+
}
202211

203212
return features;
204213
}
@@ -254,7 +263,11 @@ int ipoib_set_mode(struct net_device *dev, const char *buf)
254263
"will cause multicast packet drops\n");
255264
netdev_update_features(dev);
256265
rtnl_unlock();
257-
priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM;
266+
if (cm_ibcrc_as_csum && (test_bit(IPOIB_FLAG_CSUM,
267+
&priv->flags)))
268+
priv->tx_wr.send_flags |= IB_SEND_IP_CSUM;
269+
else
270+
priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM;
258271

259272
ipoib_flush_paths(dev);
260273

@@ -1738,6 +1751,7 @@ int ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca)
17381751
kfree(device_attr);
17391752

17401753
if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) {
1754+
set_bit(IPOIB_FLAG_CSUM, &priv->flags);
17411755
priv->dev->hw_features = NETIF_F_SG |
17421756
NETIF_F_IP_CSUM | NETIF_F_RXCSUM;
17431757

0 commit comments

Comments
 (0)