Skip to content

Commit dd5f03b

Browse files
matanb10rolandd
authored andcommitted
IB/core: Ethernet L2 attributes in verbs/cm structures
This patch add the support for Ethernet L2 attributes in the verbs/cm/cma structures. When dealing with L2 Ethernet, we should use smac, dmac, vlan ID and priority in a similar manner that the IB L2 (and the L4 PKEY) attributes are used. Thus, those attributes were added to the following structures: * ib_ah_attr - added dmac * ib_qp_attr - added smac and vlan_id, (sl remains vlan priority) * ib_wc - added smac, vlan_id * ib_sa_path_rec - added smac, dmac, vlan_id * cm_av - added smac and vlan_id For the path record structure, extra care was taken to avoid the new fields when packing it into wire format, so we don't break the IB CM and SA wire protocol. On the active side, the CM fills. its internal structures from the path provided by the ULP. We add there taking the ETH L2 attributes and placing them into the CM Address Handle (struct cm_av). On the passive side, the CM fills its internal structures from the WC associated with the REQ message. We add there taking the ETH L2 attributes from the WC. When the HW driver provides the required ETH L2 attributes in the WC, they set the IB_WC_WITH_SMAC and IB_WC_WITH_VLAN flags. The IB core code checks for the presence of these flags, and in their absence does address resolution from the ib_init_ah_from_wc() helper function. ib_modify_qp_is_ok is also updated to consider the link layer. Some parameters are mandatory for Ethernet link layer, while they are irrelevant for IB. Vendor drivers are modified to support the new function signature. Signed-off-by: Matan Barak <[email protected]> Signed-off-by: Or Gerlitz <[email protected]> Signed-off-by: Roland Dreier <[email protected]>
1 parent 7e22e91 commit dd5f03b

File tree

18 files changed

+343
-24
lines changed

18 files changed

+343
-24
lines changed

drivers/infiniband/core/addr.c

Lines changed: 94 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,8 @@ int rdma_addr_size(struct sockaddr *addr)
8686
}
8787
EXPORT_SYMBOL(rdma_addr_size);
8888

89+
static struct rdma_addr_client self;
90+
8991
void rdma_addr_register_client(struct rdma_addr_client *client)
9092
{
9193
atomic_set(&client->refcount, 1);
@@ -119,7 +121,8 @@ int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
119121
}
120122
EXPORT_SYMBOL(rdma_copy_addr);
121123

122-
int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
124+
int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
125+
u16 *vlan_id)
123126
{
124127
struct net_device *dev;
125128
int ret = -EADDRNOTAVAIL;
@@ -142,6 +145,8 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
142145
return ret;
143146

144147
ret = rdma_copy_addr(dev_addr, dev, NULL);
148+
if (vlan_id)
149+
*vlan_id = rdma_vlan_dev_vlan_id(dev);
145150
dev_put(dev);
146151
break;
147152

@@ -153,6 +158,8 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
153158
&((struct sockaddr_in6 *) addr)->sin6_addr,
154159
dev, 1)) {
155160
ret = rdma_copy_addr(dev_addr, dev, NULL);
161+
if (vlan_id)
162+
*vlan_id = rdma_vlan_dev_vlan_id(dev);
156163
break;
157164
}
158165
}
@@ -238,7 +245,7 @@ static int addr4_resolve(struct sockaddr_in *src_in,
238245
src_in->sin_addr.s_addr = fl4.saddr;
239246

240247
if (rt->dst.dev->flags & IFF_LOOPBACK) {
241-
ret = rdma_translate_ip((struct sockaddr *) dst_in, addr);
248+
ret = rdma_translate_ip((struct sockaddr *)dst_in, addr, NULL);
242249
if (!ret)
243250
memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
244251
goto put;
@@ -286,7 +293,7 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
286293
}
287294

288295
if (dst->dev->flags & IFF_LOOPBACK) {
289-
ret = rdma_translate_ip((struct sockaddr *) dst_in, addr);
296+
ret = rdma_translate_ip((struct sockaddr *)dst_in, addr, NULL);
290297
if (!ret)
291298
memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
292299
goto put;
@@ -437,6 +444,88 @@ void rdma_addr_cancel(struct rdma_dev_addr *addr)
437444
}
438445
EXPORT_SYMBOL(rdma_addr_cancel);
439446

447+
struct resolve_cb_context {
448+
struct rdma_dev_addr *addr;
449+
struct completion comp;
450+
};
451+
452+
static void resolve_cb(int status, struct sockaddr *src_addr,
453+
struct rdma_dev_addr *addr, void *context)
454+
{
455+
memcpy(((struct resolve_cb_context *)context)->addr, addr, sizeof(struct
456+
rdma_dev_addr));
457+
complete(&((struct resolve_cb_context *)context)->comp);
458+
}
459+
460+
int rdma_addr_find_dmac_by_grh(union ib_gid *sgid, union ib_gid *dgid, u8 *dmac,
461+
u16 *vlan_id)
462+
{
463+
int ret = 0;
464+
struct rdma_dev_addr dev_addr;
465+
struct resolve_cb_context ctx;
466+
struct net_device *dev;
467+
468+
union {
469+
struct sockaddr _sockaddr;
470+
struct sockaddr_in _sockaddr_in;
471+
struct sockaddr_in6 _sockaddr_in6;
472+
} sgid_addr, dgid_addr;
473+
474+
475+
ret = rdma_gid2ip(&sgid_addr._sockaddr, sgid);
476+
if (ret)
477+
return ret;
478+
479+
ret = rdma_gid2ip(&dgid_addr._sockaddr, dgid);
480+
if (ret)
481+
return ret;
482+
483+
memset(&dev_addr, 0, sizeof(dev_addr));
484+
485+
ctx.addr = &dev_addr;
486+
init_completion(&ctx.comp);
487+
ret = rdma_resolve_ip(&self, &sgid_addr._sockaddr, &dgid_addr._sockaddr,
488+
&dev_addr, 1000, resolve_cb, &ctx);
489+
if (ret)
490+
return ret;
491+
492+
wait_for_completion(&ctx.comp);
493+
494+
memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN);
495+
dev = dev_get_by_index(&init_net, dev_addr.bound_dev_if);
496+
if (!dev)
497+
return -ENODEV;
498+
if (vlan_id)
499+
*vlan_id = rdma_vlan_dev_vlan_id(dev);
500+
dev_put(dev);
501+
return ret;
502+
}
503+
EXPORT_SYMBOL(rdma_addr_find_dmac_by_grh);
504+
505+
int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id)
506+
{
507+
int ret = 0;
508+
struct rdma_dev_addr dev_addr;
509+
union {
510+
struct sockaddr _sockaddr;
511+
struct sockaddr_in _sockaddr_in;
512+
struct sockaddr_in6 _sockaddr_in6;
513+
} gid_addr;
514+
515+
ret = rdma_gid2ip(&gid_addr._sockaddr, sgid);
516+
517+
if (ret)
518+
return ret;
519+
memset(&dev_addr, 0, sizeof(dev_addr));
520+
ret = rdma_translate_ip(&gid_addr._sockaddr, &dev_addr, vlan_id);
521+
if (ret)
522+
return ret;
523+
524+
memcpy(smac, dev_addr.src_dev_addr, ETH_ALEN);
525+
return ret;
526+
}
527+
EXPORT_SYMBOL(rdma_addr_find_smac_by_sgid);
528+
440529
static int netevent_callback(struct notifier_block *self, unsigned long event,
441530
void *ctx)
442531
{
@@ -461,11 +550,13 @@ static int __init addr_init(void)
461550
return -ENOMEM;
462551

463552
register_netevent_notifier(&nb);
553+
rdma_addr_register_client(&self);
464554
return 0;
465555
}
466556

467557
static void __exit addr_cleanup(void)
468558
{
559+
rdma_addr_unregister_client(&self);
469560
unregister_netevent_notifier(&nb);
470561
destroy_workqueue(addr_wq);
471562
}

drivers/infiniband/core/cm.c

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
#include <linux/sysfs.h>
4848
#include <linux/workqueue.h>
4949
#include <linux/kdev_t.h>
50+
#include <linux/etherdevice.h>
5051

5152
#include <rdma/ib_cache.h>
5253
#include <rdma/ib_cm.h>
@@ -177,6 +178,8 @@ struct cm_av {
177178
struct ib_ah_attr ah_attr;
178179
u16 pkey_index;
179180
u8 timeout;
181+
u8 valid;
182+
u8 smac[ETH_ALEN];
180183
};
181184

182185
struct cm_work {
@@ -346,6 +349,23 @@ static void cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
346349
grh, &av->ah_attr);
347350
}
348351

352+
int ib_update_cm_av(struct ib_cm_id *id, const u8 *smac, const u8 *alt_smac)
353+
{
354+
struct cm_id_private *cm_id_priv;
355+
356+
cm_id_priv = container_of(id, struct cm_id_private, id);
357+
358+
if (smac != NULL)
359+
memcpy(cm_id_priv->av.smac, smac, sizeof(cm_id_priv->av.smac));
360+
361+
if (alt_smac != NULL)
362+
memcpy(cm_id_priv->alt_av.smac, alt_smac,
363+
sizeof(cm_id_priv->alt_av.smac));
364+
365+
return 0;
366+
}
367+
EXPORT_SYMBOL(ib_update_cm_av);
368+
349369
static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
350370
{
351371
struct cm_device *cm_dev;
@@ -376,6 +396,9 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
376396
ib_init_ah_from_path(cm_dev->ib_device, port->port_num, path,
377397
&av->ah_attr);
378398
av->timeout = path->packet_life_time + 1;
399+
memcpy(av->smac, path->smac, sizeof(av->smac));
400+
401+
av->valid = 1;
379402
return 0;
380403
}
381404

@@ -1554,6 +1577,9 @@ static int cm_req_handler(struct cm_work *work)
15541577

15551578
cm_process_routed_req(req_msg, work->mad_recv_wc->wc);
15561579
cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]);
1580+
1581+
memcpy(work->path[0].dmac, cm_id_priv->av.ah_attr.dmac, ETH_ALEN);
1582+
work->path[0].vlan_id = cm_id_priv->av.ah_attr.vlan_id;
15571583
ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av);
15581584
if (ret) {
15591585
ib_get_cached_gid(work->port->cm_dev->ib_device,
@@ -3500,6 +3526,30 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
35003526
*qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU |
35013527
IB_QP_DEST_QPN | IB_QP_RQ_PSN;
35023528
qp_attr->ah_attr = cm_id_priv->av.ah_attr;
3529+
if (!cm_id_priv->av.valid)
3530+
return -EINVAL;
3531+
if (cm_id_priv->av.ah_attr.vlan_id != 0xffff) {
3532+
qp_attr->vlan_id = cm_id_priv->av.ah_attr.vlan_id;
3533+
*qp_attr_mask |= IB_QP_VID;
3534+
}
3535+
if (!is_zero_ether_addr(cm_id_priv->av.smac)) {
3536+
memcpy(qp_attr->smac, cm_id_priv->av.smac,
3537+
sizeof(qp_attr->smac));
3538+
*qp_attr_mask |= IB_QP_SMAC;
3539+
}
3540+
if (cm_id_priv->alt_av.valid) {
3541+
if (cm_id_priv->alt_av.ah_attr.vlan_id != 0xffff) {
3542+
qp_attr->alt_vlan_id =
3543+
cm_id_priv->alt_av.ah_attr.vlan_id;
3544+
*qp_attr_mask |= IB_QP_ALT_VID;
3545+
}
3546+
if (!is_zero_ether_addr(cm_id_priv->alt_av.smac)) {
3547+
memcpy(qp_attr->alt_smac,
3548+
cm_id_priv->alt_av.smac,
3549+
sizeof(qp_attr->alt_smac));
3550+
*qp_attr_mask |= IB_QP_ALT_SMAC;
3551+
}
3552+
}
35033553
qp_attr->path_mtu = cm_id_priv->path_mtu;
35043554
qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn);
35053555
qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn);

drivers/infiniband/core/cma.c

Lines changed: 53 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -340,7 +340,7 @@ static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_a
340340
int ret;
341341

342342
if (addr->sa_family != AF_IB) {
343-
ret = rdma_translate_ip(addr, dev_addr);
343+
ret = rdma_translate_ip(addr, dev_addr, NULL);
344344
} else {
345345
cma_translate_ib((struct sockaddr_ib *) addr, dev_addr);
346346
ret = 0;
@@ -603,6 +603,7 @@ static int cma_modify_qp_rtr(struct rdma_id_private *id_priv,
603603
{
604604
struct ib_qp_attr qp_attr;
605605
int qp_attr_mask, ret;
606+
union ib_gid sgid;
606607

607608
mutex_lock(&id_priv->qp_mutex);
608609
if (!id_priv->id.qp) {
@@ -625,6 +626,20 @@ static int cma_modify_qp_rtr(struct rdma_id_private *id_priv,
625626
if (ret)
626627
goto out;
627628

629+
ret = ib_query_gid(id_priv->id.device, id_priv->id.port_num,
630+
qp_attr.ah_attr.grh.sgid_index, &sgid);
631+
if (ret)
632+
goto out;
633+
634+
if (rdma_node_get_transport(id_priv->cma_dev->device->node_type)
635+
== RDMA_TRANSPORT_IB &&
636+
rdma_port_get_link_layer(id_priv->id.device, id_priv->id.port_num)
637+
== IB_LINK_LAYER_ETHERNET) {
638+
ret = rdma_addr_find_smac_by_sgid(&sgid, qp_attr.smac, NULL);
639+
640+
if (ret)
641+
goto out;
642+
}
628643
if (conn_param)
629644
qp_attr.max_dest_rd_atomic = conn_param->responder_resources;
630645
ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
@@ -725,6 +740,7 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
725740
else
726741
ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr,
727742
qp_attr_mask);
743+
728744
if (qp_attr->qp_state == IB_QPS_RTR)
729745
qp_attr->rq_psn = id_priv->seq_num;
730746
break;
@@ -1266,6 +1282,15 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
12661282
struct rdma_id_private *listen_id, *conn_id;
12671283
struct rdma_cm_event event;
12681284
int offset, ret;
1285+
u8 smac[ETH_ALEN];
1286+
u8 alt_smac[ETH_ALEN];
1287+
u8 *psmac = smac;
1288+
u8 *palt_smac = alt_smac;
1289+
int is_iboe = ((rdma_node_get_transport(cm_id->device->node_type) ==
1290+
RDMA_TRANSPORT_IB) &&
1291+
(rdma_port_get_link_layer(cm_id->device,
1292+
ib_event->param.req_rcvd.port) ==
1293+
IB_LINK_LAYER_ETHERNET));
12691294

12701295
listen_id = cm_id->context;
12711296
if (!cma_check_req_qp_type(&listen_id->id, ib_event))
@@ -1310,12 +1335,29 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
13101335
if (ret)
13111336
goto err3;
13121337

1338+
if (is_iboe) {
1339+
if (ib_event->param.req_rcvd.primary_path != NULL)
1340+
rdma_addr_find_smac_by_sgid(
1341+
&ib_event->param.req_rcvd.primary_path->sgid,
1342+
psmac, NULL);
1343+
else
1344+
psmac = NULL;
1345+
if (ib_event->param.req_rcvd.alternate_path != NULL)
1346+
rdma_addr_find_smac_by_sgid(
1347+
&ib_event->param.req_rcvd.alternate_path->sgid,
1348+
palt_smac, NULL);
1349+
else
1350+
palt_smac = NULL;
1351+
}
13131352
/*
13141353
* Acquire mutex to prevent user executing rdma_destroy_id()
13151354
* while we're accessing the cm_id.
13161355
*/
13171356
mutex_lock(&lock);
1318-
if (cma_comp(conn_id, RDMA_CM_CONNECT) && (conn_id->id.qp_type != IB_QPT_UD))
1357+
if (is_iboe)
1358+
ib_update_cm_av(cm_id, psmac, palt_smac);
1359+
if (cma_comp(conn_id, RDMA_CM_CONNECT) &&
1360+
(conn_id->id.qp_type != IB_QPT_UD))
13191361
ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
13201362
mutex_unlock(&lock);
13211363
mutex_unlock(&conn_id->handler_mutex);
@@ -1474,7 +1516,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
14741516
mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
14751517
conn_id->state = RDMA_CM_CONNECT;
14761518

1477-
ret = rdma_translate_ip(laddr, &conn_id->id.route.addr.dev_addr);
1519+
ret = rdma_translate_ip(laddr, &conn_id->id.route.addr.dev_addr, NULL);
14781520
if (ret) {
14791521
mutex_unlock(&conn_id->handler_mutex);
14801522
rdma_destroy_id(new_cm_id);
@@ -1873,7 +1915,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
18731915
struct cma_work *work;
18741916
int ret;
18751917
struct net_device *ndev = NULL;
1876-
u16 vid;
1918+
18771919

18781920
work = kzalloc(sizeof *work, GFP_KERNEL);
18791921
if (!work)
@@ -1897,10 +1939,14 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
18971939
goto err2;
18981940
}
18991941

1900-
vid = rdma_vlan_dev_vlan_id(ndev);
1942+
route->path_rec->vlan_id = rdma_vlan_dev_vlan_id(ndev);
1943+
memcpy(route->path_rec->dmac, addr->dev_addr.dst_dev_addr, ETH_ALEN);
1944+
memcpy(route->path_rec->smac, ndev->dev_addr, ndev->addr_len);
19011945

1902-
iboe_mac_vlan_to_ll(&route->path_rec->sgid, addr->dev_addr.src_dev_addr, vid);
1903-
iboe_mac_vlan_to_ll(&route->path_rec->dgid, addr->dev_addr.dst_dev_addr, vid);
1946+
iboe_mac_vlan_to_ll(&route->path_rec->sgid, addr->dev_addr.src_dev_addr,
1947+
route->path_rec->vlan_id);
1948+
iboe_mac_vlan_to_ll(&route->path_rec->dgid, addr->dev_addr.dst_dev_addr,
1949+
route->path_rec->vlan_id);
19041950

19051951
route->path_rec->hop_limit = 1;
19061952
route->path_rec->reversible = 1;

0 commit comments

Comments
 (0)