Skip to content

Commit 6991abc

Browse files
kwan-intcjgunthorpe
authored andcommitted
IB/hfi1: Add functions to receive accelerated ipoib packets
Ipoib netdev will share receive contexts with existing VNIC netdev. To achieve that, a dummy netdev is allocated with hfi1_devdata to own the receive contexts, and ipoib and VNIC netdevs will be put on top of it. Each receive context is associated with a single NAPI object. This patch adds the functions to receive incoming packets for accelerated ipoib. Link: https://lore.kernel.org/r/[email protected] Reviewed-by: Mike Marciniszyn <[email protected]> Reviewed-by: Dennis Dalessandro <[email protected]> Signed-off-by: Sadanand Warrier <[email protected]> Signed-off-by: Grzegorz Andrejczuk <[email protected]> Signed-off-by: Kaike Wan <[email protected]> Signed-off-by: Dennis Dalessandro <[email protected]> Signed-off-by: Jason Gunthorpe <[email protected]>
1 parent 89dcaa3 commit 6991abc

File tree

7 files changed

+355
-2
lines changed

7 files changed

+355
-2
lines changed

drivers/infiniband/hw/hfi1/Makefile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,12 @@ hfi1-y := \
2323
intr.o \
2424
iowait.o \
2525
ipoib_main.o \
26+
ipoib_rx.o \
2627
ipoib_tx.o \
2728
mad.o \
2829
mmu_rb.o \
2930
msix.o \
31+
netdev_rx.o \
3032
opfn.o \
3133
pcie.o \
3234
pio.o \

drivers/infiniband/hw/hfi1/driver.c

Lines changed: 91 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright(c) 2015-2018 Intel Corporation.
2+
* Copyright(c) 2015-2020 Intel Corporation.
33
*
44
* This file is provided under a dual BSD/GPLv2 license. When using or
55
* redistributing this file, you may do so under either license.
@@ -54,6 +54,7 @@
5454
#include <linux/module.h>
5555
#include <linux/prefetch.h>
5656
#include <rdma/ib_verbs.h>
57+
#include <linux/etherdevice.h>
5758

5859
#include "hfi.h"
5960
#include "trace.h"
@@ -63,6 +64,9 @@
6364
#include "vnic.h"
6465
#include "fault.h"
6566

67+
#include "ipoib.h"
68+
#include "netdev.h"
69+
6670
#undef pr_fmt
6771
#define pr_fmt(fmt) DRIVER_NAME ": " fmt
6872

@@ -1550,6 +1554,81 @@ void handle_eflags(struct hfi1_packet *packet)
15501554
show_eflags_errs(packet);
15511555
}
15521556

1557+
static void hfi1_ipoib_ib_rcv(struct hfi1_packet *packet)
1558+
{
1559+
struct hfi1_ibport *ibp;
1560+
struct net_device *netdev;
1561+
struct hfi1_ctxtdata *rcd = packet->rcd;
1562+
struct napi_struct *napi = rcd->napi;
1563+
struct sk_buff *skb;
1564+
struct hfi1_netdev_rxq *rxq = container_of(napi,
1565+
struct hfi1_netdev_rxq, napi);
1566+
u32 extra_bytes;
1567+
u32 tlen, qpnum;
1568+
bool do_work, do_cnp;
1569+
struct hfi1_ipoib_dev_priv *priv;
1570+
1571+
trace_hfi1_rcvhdr(packet);
1572+
1573+
hfi1_setup_ib_header(packet);
1574+
1575+
packet->ohdr = &((struct ib_header *)packet->hdr)->u.oth;
1576+
packet->grh = NULL;
1577+
1578+
if (unlikely(rhf_err_flags(packet->rhf))) {
1579+
handle_eflags(packet);
1580+
return;
1581+
}
1582+
1583+
qpnum = ib_bth_get_qpn(packet->ohdr);
1584+
netdev = hfi1_netdev_get_data(rcd->dd, qpnum);
1585+
if (!netdev)
1586+
goto drop_no_nd;
1587+
1588+
trace_input_ibhdr(rcd->dd, packet, !!(rhf_dc_info(packet->rhf)));
1589+
1590+
/* handle congestion notifications */
1591+
do_work = hfi1_may_ecn(packet);
1592+
if (unlikely(do_work)) {
1593+
do_cnp = (packet->opcode != IB_OPCODE_CNP);
1594+
(void)hfi1_process_ecn_slowpath(hfi1_ipoib_priv(netdev)->qp,
1595+
packet, do_cnp);
1596+
}
1597+
1598+
/*
1599+
* We have split point after last byte of DETH
1600+
* lets strip padding and CRC and ICRC.
1601+
* tlen is whole packet len so we need to
1602+
* subtract header size as well.
1603+
*/
1604+
tlen = packet->tlen;
1605+
extra_bytes = ib_bth_get_pad(packet->ohdr) + (SIZE_OF_CRC << 2) +
1606+
packet->hlen;
1607+
if (unlikely(tlen < extra_bytes))
1608+
goto drop;
1609+
1610+
tlen -= extra_bytes;
1611+
1612+
skb = hfi1_ipoib_prepare_skb(rxq, tlen, packet->ebuf);
1613+
if (unlikely(!skb))
1614+
goto drop;
1615+
1616+
priv = hfi1_ipoib_priv(netdev);
1617+
hfi1_ipoib_update_rx_netstats(priv, 1, skb->len);
1618+
1619+
skb->dev = netdev;
1620+
skb->pkt_type = PACKET_HOST;
1621+
netif_receive_skb(skb);
1622+
1623+
return;
1624+
1625+
drop:
1626+
++netdev->stats.rx_dropped;
1627+
drop_no_nd:
1628+
ibp = rcd_to_iport(packet->rcd);
1629+
++ibp->rvp.n_pkt_drops;
1630+
}
1631+
15531632
/*
15541633
* The following functions are called by the interrupt handler. They are type
15551634
* specific handlers for each packet type.
@@ -1757,3 +1836,14 @@ const rhf_rcv_function_ptr normal_rhf_rcv_functions[] = {
17571836
[RHF_RCV_TYPE_INVALID6] = process_receive_invalid,
17581837
[RHF_RCV_TYPE_INVALID7] = process_receive_invalid,
17591838
};
1839+
1840+
const rhf_rcv_function_ptr netdev_rhf_rcv_functions[] = {
1841+
[RHF_RCV_TYPE_EXPECTED] = process_receive_invalid,
1842+
[RHF_RCV_TYPE_EAGER] = process_receive_invalid,
1843+
[RHF_RCV_TYPE_IB] = hfi1_ipoib_ib_rcv,
1844+
[RHF_RCV_TYPE_ERROR] = process_receive_error,
1845+
[RHF_RCV_TYPE_BYPASS] = hfi1_vnic_bypass_rcv,
1846+
[RHF_RCV_TYPE_INVALID5] = process_receive_invalid,
1847+
[RHF_RCV_TYPE_INVALID6] = process_receive_invalid,
1848+
[RHF_RCV_TYPE_INVALID7] = process_receive_invalid,
1849+
};

drivers/infiniband/hw/hfi1/hfi.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,8 @@ struct hfi1_ctxtdata {
233233
intr_handler fast_handler;
234234
/** slow handler */
235235
intr_handler slow_handler;
236+
/* napi pointer assiociated with netdev */
237+
struct napi_struct *napi;
236238
/* verbs rx_stats per rcd */
237239
struct hfi1_opcode_stats_perctx *opstats;
238240
/* clear interrupt mask */
@@ -985,7 +987,7 @@ typedef void (*hfi1_make_req)(struct rvt_qp *qp,
985987
struct hfi1_pkt_state *ps,
986988
struct rvt_swqe *wqe);
987989
extern const rhf_rcv_function_ptr normal_rhf_rcv_functions[];
988-
990+
extern const rhf_rcv_function_ptr netdev_rhf_rcv_functions[];
989991

990992
/* return values for the RHF receive functions */
991993
#define RHF_RCV_CONTINUE 0 /* keep going */
@@ -1417,6 +1419,7 @@ struct hfi1_devdata {
14171419
struct hfi1_vnic_data vnic;
14181420
/* Lock to protect IRQ SRC register access */
14191421
spinlock_t irq_src_lock;
1422+
struct net_device *dummy_netdev;
14201423

14211424
/* Keeps track of IPoIB RSM rule users */
14221425
atomic_t ipoib_rsm_usr_num;

drivers/infiniband/hw/hfi1/ipoib.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,15 @@
2222

2323
#include "hfi.h"
2424
#include "iowait.h"
25+
#include "netdev.h"
2526

2627
#include <rdma/ib_verbs.h>
2728

2829
#define HFI1_IPOIB_ENTROPY_SHIFT 24
2930

3031
#define HFI1_IPOIB_TXREQ_NAME_LEN 32
3132

33+
#define HFI1_IPOIB_PSEUDO_LEN 20
3234
#define HFI1_IPOIB_ENCAP_LEN 4
3335

3436
struct hfi1_ipoib_dev_priv;
@@ -118,6 +120,19 @@ hfi1_ipoib_priv(const struct net_device *dev)
118120
return &((struct hfi1_ipoib_rdma_netdev *)netdev_priv(dev))->dev_priv;
119121
}
120122

123+
static inline void
124+
hfi1_ipoib_update_rx_netstats(struct hfi1_ipoib_dev_priv *priv,
125+
u64 packets,
126+
u64 bytes)
127+
{
128+
struct pcpu_sw_netstats *netstats = this_cpu_ptr(priv->netstats);
129+
130+
u64_stats_update_begin(&netstats->syncp);
131+
netstats->rx_packets += packets;
132+
netstats->rx_bytes += bytes;
133+
u64_stats_update_end(&netstats->syncp);
134+
}
135+
121136
static inline void
122137
hfi1_ipoib_update_tx_netstats(struct hfi1_ipoib_dev_priv *priv,
123138
u64 packets,
@@ -142,6 +157,9 @@ void hfi1_ipoib_txreq_deinit(struct hfi1_ipoib_dev_priv *priv);
142157
void hfi1_ipoib_napi_tx_enable(struct net_device *dev);
143158
void hfi1_ipoib_napi_tx_disable(struct net_device *dev);
144159

160+
struct sk_buff *hfi1_ipoib_prepare_skb(struct hfi1_netdev_rxq *rxq,
161+
int size, void *data);
162+
145163
int hfi1_ipoib_rn_get_params(struct ib_device *device,
146164
u8 port_num,
147165
enum rdma_netdev_t type,

drivers/infiniband/hw/hfi1/ipoib_rx.c

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
2+
/*
3+
* Copyright(c) 2020 Intel Corporation.
4+
*
5+
*/
6+
7+
#include "netdev.h"
8+
#include "ipoib.h"
9+
10+
#define HFI1_IPOIB_SKB_PAD ((NET_SKB_PAD) + (NET_IP_ALIGN))
11+
12+
static void copy_ipoib_buf(struct sk_buff *skb, void *data, int size)
13+
{
14+
void *dst_data;
15+
16+
skb_checksum_none_assert(skb);
17+
skb->protocol = *((__be16 *)data);
18+
19+
dst_data = skb_put(skb, size);
20+
memcpy(dst_data, data, size);
21+
skb->mac_header = HFI1_IPOIB_PSEUDO_LEN;
22+
skb_pull(skb, HFI1_IPOIB_ENCAP_LEN);
23+
}
24+
25+
static struct sk_buff *prepare_frag_skb(struct napi_struct *napi, int size)
26+
{
27+
struct sk_buff *skb;
28+
int skb_size = SKB_DATA_ALIGN(size + HFI1_IPOIB_SKB_PAD);
29+
void *frag;
30+
31+
skb_size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
32+
skb_size = SKB_DATA_ALIGN(skb_size);
33+
frag = napi_alloc_frag(skb_size);
34+
35+
if (unlikely(!frag))
36+
return napi_alloc_skb(napi, size);
37+
38+
skb = build_skb(frag, skb_size);
39+
40+
if (unlikely(!skb)) {
41+
skb_free_frag(frag);
42+
return NULL;
43+
}
44+
45+
skb_reserve(skb, HFI1_IPOIB_SKB_PAD);
46+
return skb;
47+
}
48+
49+
struct sk_buff *hfi1_ipoib_prepare_skb(struct hfi1_netdev_rxq *rxq,
50+
int size, void *data)
51+
{
52+
struct napi_struct *napi = &rxq->napi;
53+
int skb_size = size + HFI1_IPOIB_ENCAP_LEN;
54+
struct sk_buff *skb;
55+
56+
/*
57+
* For smaller(4k + skb overhead) allocations we will go using
58+
* napi cache. Otherwise we will try to use napi frag cache.
59+
*/
60+
if (size <= SKB_WITH_OVERHEAD(PAGE_SIZE))
61+
skb = napi_alloc_skb(napi, skb_size);
62+
else
63+
skb = prepare_frag_skb(napi, skb_size);
64+
65+
if (unlikely(!skb))
66+
return NULL;
67+
68+
copy_ipoib_buf(skb, data, size);
69+
70+
return skb;
71+
}

drivers/infiniband/hw/hfi1/netdev.h

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
2+
/*
3+
* Copyright(c) 2020 Intel Corporation.
4+
*
5+
*/
6+
7+
#ifndef HFI1_NETDEV_H
8+
#define HFI1_NETDEV_H
9+
10+
#include "hfi.h"
11+
12+
#include <linux/netdevice.h>
13+
#include <linux/xarray.h>
14+
15+
/**
16+
* struct hfi1_netdev_rxq - Receive Queue for HFI
17+
* dummy netdev. Both IPoIB and VNIC netdevices will be working on
18+
* top of this device.
19+
* @napi: napi object
20+
* @priv: ptr to netdev_priv
21+
* @rcd: ptr to receive context data
22+
*/
23+
struct hfi1_netdev_rxq {
24+
struct napi_struct napi;
25+
struct hfi1_netdev_priv *priv;
26+
struct hfi1_ctxtdata *rcd;
27+
};
28+
29+
/*
30+
* Number of netdev contexts used. Ensure it is less than or equal to
31+
* max queues supported by VNIC (HFI1_VNIC_MAX_QUEUE).
32+
*/
33+
#define HFI1_MAX_NETDEV_CTXTS 8
34+
35+
/* Number of NETDEV RSM entries */
36+
#define NUM_NETDEV_MAP_ENTRIES HFI1_MAX_NETDEV_CTXTS
37+
38+
/**
39+
* struct hfi1_netdev_priv: data required to setup and run HFI netdev.
40+
* @dd: hfi1_devdata
41+
* @rxq: pointer to dummy netdev receive queues.
42+
* @num_rx_q: number of receive queues
43+
* @rmt_index: first free index in RMT Array
44+
* @msix_start: first free MSI-X interrupt vector.
45+
* @dev_tbl: netdev table for unique identifier VNIC and IPoIb VLANs.
46+
* @enabled: atomic counter of netdevs enabling receive queues.
47+
* When 0 NAPI will be disabled.
48+
* @netdevs: atomic counter of netdevs using dummy netdev.
49+
* When 0 receive queues will be freed.
50+
*/
51+
struct hfi1_netdev_priv {
52+
struct hfi1_devdata *dd;
53+
struct hfi1_netdev_rxq *rxq;
54+
int num_rx_q;
55+
int rmt_start;
56+
struct xarray dev_tbl;
57+
/* count of enabled napi polls */
58+
atomic_t enabled;
59+
/* count of netdevs on top */
60+
atomic_t netdevs;
61+
};
62+
63+
static inline
64+
struct hfi1_netdev_priv *hfi1_netdev_priv(struct net_device *dev)
65+
{
66+
return (struct hfi1_netdev_priv *)&dev[1];
67+
}
68+
69+
static inline
70+
int hfi1_netdev_ctxt_count(struct hfi1_devdata *dd)
71+
{
72+
struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev);
73+
74+
return priv->num_rx_q;
75+
}
76+
77+
static inline
78+
struct hfi1_ctxtdata *hfi1_netdev_get_ctxt(struct hfi1_devdata *dd, int ctxt)
79+
{
80+
struct hfi1_netdev_priv *priv = hfi1_netdev_priv(dd->dummy_netdev);
81+
82+
return priv->rxq[ctxt].rcd;
83+
}
84+
85+
int hfi1_netdev_add_data(struct hfi1_devdata *dd, int id, void *data);
86+
void *hfi1_netdev_remove_data(struct hfi1_devdata *dd, int id);
87+
void *hfi1_netdev_get_data(struct hfi1_devdata *dd, int id);
88+
void *hfi1_netdev_get_first_data(struct hfi1_devdata *dd, int *start_id);
89+
90+
#endif /* HFI1_NETDEV_H */

0 commit comments

Comments
 (0)