Skip to content

Commit 7e79699

Browse files
Yuval Shaiajfvogel
authored andcommitted
IB/ipoib: CSUM support in connected mode
This enhancement suggest the usage of IB CRC instead of CSUM in IPoIB CM. IPoIB CM uses RC (Reliable Connection) which guarantees the corruption free delivery of the packet. InfiniBand uses 32b CRC which provides stronger data integrity protection compare to the 16b IP L4 Checksum. So, there is no added value that IP/TCP/UDP Checksum provides in the IB world. The proposal is to tell network stack that IPoIB-CM supports IP Checksum offload. This enables the kernel to save the time of checksum calculation of IPoIB CM packets. Network sends the IP packet without adding the IP Checksum to the header. On the receive side, IPoIB driver again tells the network stack that IP Checksum is good for the incoming packets and network stack avoids the IP Checksum calculations. During connection establishment the driver determine if the peer supports IB CRC as checksum. This is done so driver will be able to calculate checksum before transmiting the packet in case the peer does not support this feature. Orabug: 20559068 Tested-Acked-by: Santosh Shilimkar <[email protected]> Reviewed-by: Mukesh Kacker <[email protected]> Reviewed-by: Ajaykumar Hotchandani <[email protected]> Signed-off-by: Yuval Shaia <[email protected]> Orabug: 27487353 (cherry picked from commit 0910c99) cherry-pick-repo=linux-uek.git Conflicts: drivers/infiniband/ulp/ipoib/ipoib.h drivers/infiniband/ulp/ipoib/ipoib_cm.c drivers/infiniband/ulp/ipoib/ipoib_main.c Signed-off-by: Yuval Shaia <[email protected]> Reviewed-by: Håkon Bugge <[email protected]> Signed-off-by: Aron Silverton <[email protected]> Signed-off-by: Somasundaram Krishnasamy <[email protected]> Orabug: 30833821 UEK5 => UEK6 (cherry picked from commit c06448b) cherry-pick-repo=UEK/production/linux-uek.git Conflicts: drivers/infiniband/ulp/ipoib/ipoib.h drivers/infiniband/ulp/ipoib/ipoib_cm.c drivers/infiniband/ulp/ipoib/ipoib_main.c Conflicts as a result of changes made in upstream code. Signed-off-by: Yuval Shaia <[email protected]> Signed-off-by: Qing Huang <[email protected]> Reviewed-by: Sudhakar Dindukurti <[email protected]> Signed-off-by: Aron Silverton <[email protected]>
1 parent 363dada commit 7e79699

File tree

3 files changed

+130
-0
lines changed

3 files changed

+130
-0
lines changed

drivers/infiniband/ulp/ipoib/ipoib.h

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,9 @@ enum {
9494
IPOIB_NEIGH_TBL_FLUSH = 12,
9595
IPOIB_FLAG_DEV_ADDR_SET = 13,
9696
IPOIB_FLAG_DEV_ADDR_CTRL = 14,
97+
#ifndef WITHOUT_ORACLE_EXTENSIONS
98+
IPOIB_FLAG_CSUM = 17,
99+
#endif /* !WITHOUT_ORACLE_EXTENSIONS */
97100

98101
IPOIB_MAX_BACKOFF_SECONDS = 16,
99102

@@ -193,9 +196,24 @@ struct ipoib_tx_buf {
193196

194197
struct ib_cm_id;
195198

199+
#ifndef WITHOUT_ORACLE_EXTENSIONS
200+
/* Signature so driver can make sure ipoib_cm_data.caps is valid */
201+
#define IPOIB_CM_PROTO_SIG 0x2211
202+
/* Current driver ipoib_cm_data version */
203+
#define IPOIB_CM_PROTO_VER (1UL << 12)
204+
205+
enum ipoib_cm_data_caps {
206+
IPOIB_CM_CAPS_IBCRC_AS_CSUM = 1UL << 0,
207+
};
208+
#endif /* !WITHOUT_ORACLE_EXTENSIONS */
209+
196210
struct ipoib_cm_data {
197211
__be32 qpn; /* High byte MUST be ignored on receive */
198212
__be32 mtu;
213+
#ifndef WITHOUT_ORACLE_EXTENSIONS
214+
__be16 sig; /* must be IPOIB_CM_PROTO_SIG */
215+
__be16 caps; /* 4 bits proto ver and 12 bits capabilities */
216+
#endif /* !WITHOUT_ORACLE_EXTENSIONS */
199217
};
200218

201219
/*
@@ -240,6 +258,9 @@ struct ipoib_cm_rx {
240258
unsigned long jiffies;
241259
enum ipoib_cm_state state;
242260
int recv_count;
261+
#ifndef WITHOUT_ORACLE_EXTENSIONS
262+
u16 caps;
263+
#endif /* !WITHOUT_ORACLE_EXTENSIONS */
243264
};
244265

245266
struct ipoib_cm_tx {
@@ -254,6 +275,9 @@ struct ipoib_cm_tx {
254275
unsigned long flags;
255276
u32 mtu;
256277
unsigned int max_send_sge;
278+
#ifndef WITHOUT_ORACLE_EXTENSIONS
279+
u16 caps;
280+
#endif /* !WITHOUT_ORACLE_EXTENSIONS */
257281
};
258282

259283
struct ipoib_cm_rx_buf {
@@ -476,8 +500,24 @@ void ipoib_del_neighs_by_gid(struct net_device *dev, u8 *gid);
476500

477501
extern struct workqueue_struct *ipoib_workqueue;
478502

503+
#ifndef WITHOUT_ORACLE_EXTENSIONS
504+
extern int cm_ibcrc_as_csum;
505+
#endif /* !WITHOUT_ORACLE_EXTENSIONS */
506+
479507
/* functions */
480508

509+
#ifndef WITHOUT_ORACLE_EXTENSIONS
510+
static inline int ipoib_cm_check_proto_sig(u16 proto_sig)
511+
{
512+
return (proto_sig == IPOIB_CM_PROTO_SIG);
513+
}
514+
515+
static inline int ipoib_cm_check_proto_ver(u16 caps)
516+
{
517+
return ((caps & 0xF000) == IPOIB_CM_PROTO_VER);
518+
}
519+
#endif /* !WITHOUT_ORACLE_EXTENSIONS */
520+
481521
int ipoib_rx_poll(struct napi_struct *napi, int budget);
482522
int ipoib_tx_poll(struct napi_struct *napi, int budget);
483523
void ipoib_ib_rx_completion(struct ib_cq *cq, void *ctx_ptr);

drivers/infiniband/ulp/ipoib/ipoib_cm.c

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -425,9 +425,19 @@ static int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id,
425425
struct ipoib_dev_priv *priv = ipoib_priv(dev);
426426
struct ipoib_cm_data data = {};
427427
struct ib_cm_rep_param rep = {};
428+
#ifndef WITHOUT_ORACLE_EXTENSIONS
429+
u16 caps = IPOIB_CM_PROTO_VER;
430+
431+
if (cm_ibcrc_as_csum && test_bit(IPOIB_FLAG_CSUM, &priv->flags))
432+
caps |= IPOIB_CM_CAPS_IBCRC_AS_CSUM;
433+
#endif /* !WITHOUT_ORACLE_EXTENSIONS */
428434

429435
data.qpn = cpu_to_be32(priv->qp->qp_num);
430436
data.mtu = cpu_to_be32(IPOIB_CM_BUF_SIZE);
437+
#ifndef WITHOUT_ORACLE_EXTENSIONS
438+
data.sig = cpu_to_be16(IPOIB_CM_PROTO_SIG);
439+
data.caps = cpu_to_be16(caps);
440+
#endif /* !WITHOUT_ORACLE_EXTENSIONS */
431441

432442
rep.private_data = &data;
433443
rep.private_data_len = sizeof(data);
@@ -447,6 +457,9 @@ static int ipoib_cm_req_handler(struct ib_cm_id *cm_id,
447457
struct ipoib_cm_rx *p;
448458
unsigned int psn;
449459
int ret;
460+
#ifndef WITHOUT_ORACLE_EXTENSIONS
461+
struct ipoib_cm_data *cm_data;
462+
#endif /* !WITHOUT_ORACLE_EXTENSIONS */
450463

451464
ipoib_dbg(priv, "REQ arrived\n");
452465
p = kzalloc(sizeof(*p), GFP_KERNEL);
@@ -465,6 +478,15 @@ static int ipoib_cm_req_handler(struct ib_cm_id *cm_id,
465478
goto err_qp;
466479
}
467480

481+
#ifndef WITHOUT_ORACLE_EXTENSIONS
482+
cm_data = (struct ipoib_cm_data *)event->private_data;
483+
ipoib_dbg(priv, "Otherend sig=0x%x\n", be16_to_cpu(cm_data->sig));
484+
if (ipoib_cm_check_proto_sig(be16_to_cpu(cm_data->sig)) &&
485+
ipoib_cm_check_proto_ver(be16_to_cpu(cm_data->caps)))
486+
p->caps = be16_to_cpu(cm_data->caps);
487+
ipoib_dbg(priv, "Otherend caps=0x%x\n", p->caps);
488+
#endif /* !WITHOUT_ORACLE_EXTENSIONS */
489+
468490
psn = prandom_u32() & 0xffffff;
469491
ret = ipoib_cm_modify_rx_qp(dev, cm_id, p->qp, psn);
470492
if (ret)
@@ -674,6 +696,12 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
674696
skb->dev = dev;
675697
/* XXX get correct PACKET_ type here */
676698
skb->pkt_type = PACKET_HOST;
699+
700+
#ifndef WITHOUT_ORACLE_EXTENSIONS
701+
if (cm_ibcrc_as_csum && test_bit(IPOIB_FLAG_CSUM, &priv->flags))
702+
skb->ip_summed = CHECKSUM_UNNECESSARY;
703+
#endif /* !WITHOUT_ORACLE_EXTENSIONS */
704+
677705
netif_receive_skb(skb);
678706

679707
repost:
@@ -763,6 +791,20 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_
763791
netif_stop_queue(dev);
764792
}
765793

794+
#ifndef WITHOUT_ORACLE_EXTENSIONS
795+
/* Calculate checksum if we support ibcrc_as_csum but peer is not */
796+
if ((skb->ip_summed == CHECKSUM_PARTIAL) && cm_ibcrc_as_csum &&
797+
test_bit(IPOIB_FLAG_CSUM, &priv->flags) &&
798+
!(tx->caps & IPOIB_CM_CAPS_IBCRC_AS_CSUM)) {
799+
if (skb_checksum_help(skb)) {
800+
ipoib_warn(priv, "Fail to csum skb\n");
801+
++dev->stats.tx_errors;
802+
dev_kfree_skb_any(skb);
803+
return;
804+
}
805+
}
806+
#endif /* !WITHOUT_ORACLE_EXTENSIONS */
807+
766808
skb_orphan(skb);
767809
skb_dst_drop(skb);
768810

@@ -992,6 +1034,9 @@ static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id,
9921034
struct ib_qp_attr qp_attr;
9931035
int qp_attr_mask, ret;
9941036
struct sk_buff *skb;
1037+
#ifndef WITHOUT_ORACLE_EXTENSIONS
1038+
struct ipoib_cm_data *cm_data;
1039+
#endif /* !WITHOUT_ORACLE_EXTENSIONS */
9951040

9961041
p->mtu = be32_to_cpu(data->mtu);
9971042

@@ -1001,6 +1046,15 @@ static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id,
10011046
return -EINVAL;
10021047
}
10031048

1049+
#ifndef WITHOUT_ORACLE_EXTENSIONS
1050+
cm_data = (struct ipoib_cm_data *)event->private_data;
1051+
ipoib_dbg(priv, "Otherend sig=0x%x\n", be16_to_cpu(cm_data->sig));
1052+
if (ipoib_cm_check_proto_sig(be16_to_cpu(cm_data->sig)) &&
1053+
ipoib_cm_check_proto_ver(be16_to_cpu(cm_data->caps)))
1054+
p->caps = be16_to_cpu(cm_data->caps);
1055+
ipoib_dbg(priv, "Otherend caps=0x%x\n", p->caps);
1056+
#endif /* !WITHOUT_ORACLE_EXTENSIONS */
1057+
10041058
qp_attr.qp_state = IB_QPS_RTR;
10051059
ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask);
10061060
if (ret) {
@@ -1087,9 +1141,19 @@ static int ipoib_cm_send_req(struct net_device *dev,
10871141
struct ipoib_dev_priv *priv = ipoib_priv(dev);
10881142
struct ipoib_cm_data data = {};
10891143
struct ib_cm_req_param req = {};
1144+
#ifndef WITHOUT_ORACLE_EXTENSIONS
1145+
u16 caps = IPOIB_CM_PROTO_VER;
1146+
1147+
if (cm_ibcrc_as_csum && test_bit(IPOIB_FLAG_CSUM, &priv->flags))
1148+
caps |= IPOIB_CM_CAPS_IBCRC_AS_CSUM;
1149+
#endif /* !WITHOUT_ORACLE_EXTENSIONS */
10901150

10911151
data.qpn = cpu_to_be32(priv->qp->qp_num);
10921152
data.mtu = cpu_to_be32(IPOIB_CM_BUF_SIZE);
1153+
#ifndef WITHOUT_ORACLE_EXTENSIONS
1154+
data.sig = cpu_to_be16(IPOIB_CM_PROTO_SIG);
1155+
data.caps = cpu_to_be16(caps);
1156+
#endif /* !WITHOUT_ORACLE_EXTENSIONS */
10931157

10941158
req.primary_path = pathrec;
10951159
req.alternate_path = NULL;

drivers/infiniband/ulp/ipoib/ipoib_main.c

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,13 @@ module_param_named(debug_level, ipoib_debug_level, int, 0644);
7878
MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0");
7979
#endif
8080

81+
#ifndef WITHOUT_ORACLE_EXTENSIONS
82+
int cm_ibcrc_as_csum = 1;
83+
module_param_named(cm_ibcrc_as_csum, cm_ibcrc_as_csum, int, 0444);
84+
MODULE_PARM_DESC(cm_ibcrc_as_csum,
85+
"Indicates whether to utilize IB-CRC as CSUM in connected mode, (default: 1)");
86+
#endif /* !WITHOUT_ORACLE_EXTENSIONS */
87+
8188
struct ipoib_path_iter {
8289
struct net_device *dev;
8390
struct ipoib_path path;
@@ -231,8 +238,17 @@ static netdev_features_t ipoib_fix_features(struct net_device *dev, netdev_featu
231238
{
232239
struct ipoib_dev_priv *priv = ipoib_priv(dev);
233240

241+
#ifdef WITHOUT_ORACLE_EXTENSIONS
234242
if (test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags))
235243
features &= ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
244+
#else
245+
if (test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags)) {
246+
features &= ~NETIF_F_TSO;
247+
if (!(cm_ibcrc_as_csum && test_bit(IPOIB_FLAG_CSUM,
248+
&priv->flags)))
249+
features &= ~NETIF_F_IP_CSUM;
250+
}
251+
#endif /* WITHOUT_ORACLE_EXTENSIONS */
236252

237253
return features;
238254
}
@@ -546,7 +562,14 @@ int ipoib_set_mode(struct net_device *dev, const char *buf)
546562
netdev_update_features(dev);
547563
dev_set_mtu(dev, ipoib_cm_max_mtu(dev));
548564
rtnl_unlock();
565+
#ifdef WITHOUT_ORACLE_EXTENSIONS
549566
priv->tx_wr.wr.send_flags &= ~IB_SEND_IP_CSUM;
567+
#else
568+
if (cm_ibcrc_as_csum && test_bit(IPOIB_FLAG_CSUM, &priv->flags))
569+
priv->tx_wr.wr.send_flags |= IB_SEND_IP_CSUM;
570+
else
571+
priv->tx_wr.wr.send_flags &= ~IB_SEND_IP_CSUM;
572+
#endif /* WITHOUT_ORACLE_EXTENSIONS */
550573

551574
ipoib_flush_paths(dev);
552575
return (!rtnl_trylock()) ? -EBUSY : 0;
@@ -1855,6 +1878,9 @@ static void ipoib_set_dev_features(struct ipoib_dev_priv *priv)
18551878
priv->hca_caps = priv->ca->attrs.device_cap_flags;
18561879

18571880
if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) {
1881+
#ifndef WITHOUT_ORACLE_EXTENSIONS
1882+
set_bit(IPOIB_FLAG_CSUM, &priv->flags);
1883+
#endif /* !WITHOUT_ORACLE_EXTENSIONS */
18581884
priv->dev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_RXCSUM;
18591885

18601886
if (priv->hca_caps & IB_DEVICE_UD_TSO)

0 commit comments

Comments
 (0)