Skip to content

Commit f5cedc8

Browse files
catSully012davem330
authored andcommitted
gve: Add transmit and receive support
Add support for passing traffic. Signed-off-by: Catherine Sullivan <[email protected]> Signed-off-by: Sagi Shahar <[email protected]> Signed-off-by: Jon Olson <[email protected]> Acked-by: Willem de Bruijn <[email protected]> Reviewed-by: Luigi Rizzo <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 893ce44 commit f5cedc8

File tree

9 files changed

+2221
-5
lines changed

9 files changed

+2221
-5
lines changed

Documentation/networking/device_drivers/google/gve.rst

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@ The driver interacts with the device in the following ways:
4242
- See description below
4343
- Interrupts
4444
- See supported interrupts below
45+
- Transmit and Receive Queues
46+
- See description below
4547

4648
Registers
4749
---------
@@ -80,3 +82,31 @@ Notification Block Interrupts
8082
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
8183
The notification block interrupts are used to tell the driver to poll
8284
the queues associated with that interrupt.
85+
86+
The handler for these irqs schedule the napi for that block to run
87+
and poll the queues.
88+
89+
Traffic Queues
90+
--------------
91+
gVNIC's queues are composed of a descriptor ring and a buffer and are
92+
assigned to a notification block.
93+
94+
The descriptor rings are power-of-two-sized ring buffers consisting of
95+
fixed-size descriptors. They advance their head pointer using a __be32
96+
doorbell located in Bar2. The tail pointers are advanced by consuming
97+
descriptors in-order and updating a __be32 counter. Both the doorbell
98+
and the counter overflow to zero.
99+
100+
Each queue's buffers must be registered in advance with the device as a
101+
queue page list, and packet data can only be put in those pages.
102+
103+
Transmit
104+
~~~~~~~~
105+
gve maps the buffers for transmit rings into a FIFO and copies the packets
106+
into the FIFO before sending them to the NIC.
107+
108+
Receive
109+
~~~~~~~
110+
The buffers for receive rings are put into a data ring that is the same
111+
length as the descriptor ring and the head and tail pointers advance over
112+
the rings together.
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
# Makefile for the Google virtual Ethernet (gve) driver
22

33
obj-$(CONFIG_GVE) += gve.o
4-
gve-objs := gve_main.o gve_adminq.o
4+
gve-objs := gve_main.o gve_tx.o gve_rx.o gve_adminq.o

drivers/net/ethernet/google/gve/gve.h

Lines changed: 259 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
#include <linux/dma-mapping.h>
1111
#include <linux/netdevice.h>
1212
#include <linux/pci.h>
13+
#include <linux/u64_stats_sync.h>
14+
#include "gve_desc.h"
1315

1416
#ifndef PCI_VENDOR_ID_GOOGLE
1517
#define PCI_VENDOR_ID_GOOGLE 0x1ae0
@@ -20,18 +22,152 @@
2022
#define GVE_REGISTER_BAR 0
2123
#define GVE_DOORBELL_BAR 2
2224

23-
/* 1 for management */
25+
/* Driver can alloc up to 2 segments for the header and 2 for the payload. */
26+
#define GVE_TX_MAX_IOVEC 4
27+
/* 1 for management, 1 for rx, 1 for tx */
2428
#define GVE_MIN_MSIX 3
2529

30+
/* Each slot in the desc ring has a 1:1 mapping to a slot in the data ring */
31+
struct gve_rx_desc_queue {
32+
struct gve_rx_desc *desc_ring; /* the descriptor ring */
33+
dma_addr_t bus; /* the bus for the desc_ring */
34+
u32 cnt; /* free-running total number of completed packets */
35+
u32 fill_cnt; /* free-running total number of descriptors posted */
36+
u32 mask; /* masks the cnt to the size of the ring */
37+
u8 seqno; /* the next expected seqno for this desc*/
38+
};
39+
40+
/* The page info for a single slot in the RX data queue */
41+
struct gve_rx_slot_page_info {
42+
struct page *page;
43+
void *page_address;
44+
u32 page_offset; /* offset to write to in page */
45+
};
46+
47+
/* A list of pages registered with the device during setup and used by a queue
48+
* as buffers
49+
*/
50+
struct gve_queue_page_list {
51+
u32 id; /* unique id */
52+
u32 num_entries;
53+
struct page **pages; /* list of num_entries pages */
54+
dma_addr_t *page_buses; /* the dma addrs of the pages */
55+
};
56+
57+
/* Each slot in the data ring has a 1:1 mapping to a slot in the desc ring */
58+
struct gve_rx_data_queue {
59+
struct gve_rx_data_slot *data_ring; /* read by NIC */
60+
dma_addr_t data_bus; /* dma mapping of the slots */
61+
struct gve_rx_slot_page_info *page_info; /* page info of the buffers */
62+
struct gve_queue_page_list *qpl; /* qpl assigned to this queue */
63+
u32 mask; /* masks the cnt to the size of the ring */
64+
u32 cnt; /* free-running total number of completed packets */
65+
};
66+
67+
struct gve_priv;
68+
69+
/* An RX ring that contains a power-of-two sized desc and data ring. */
70+
struct gve_rx_ring {
71+
struct gve_priv *gve;
72+
struct gve_rx_desc_queue desc;
73+
struct gve_rx_data_queue data;
74+
u64 rbytes; /* free-running bytes received */
75+
u64 rpackets; /* free-running packets received */
76+
u32 q_num; /* queue index */
77+
u32 ntfy_id; /* notification block index */
78+
struct gve_queue_resources *q_resources; /* head and tail pointer idx */
79+
dma_addr_t q_resources_bus; /* dma address for the queue resources */
80+
struct u64_stats_sync statss; /* sync stats for 32bit archs */
81+
};
82+
83+
/* A TX desc ring entry */
84+
union gve_tx_desc {
85+
struct gve_tx_pkt_desc pkt; /* first desc for a packet */
86+
struct gve_tx_seg_desc seg; /* subsequent descs for a packet */
87+
};
88+
89+
/* Tracks the memory in the fifo occupied by a segment of a packet */
90+
struct gve_tx_iovec {
91+
u32 iov_offset; /* offset into this segment */
92+
u32 iov_len; /* length */
93+
u32 iov_padding; /* padding associated with this segment */
94+
};
95+
96+
/* Tracks the memory in the fifo occupied by the skb. Mapped 1:1 to a desc
97+
* ring entry but only used for a pkt_desc not a seg_desc
98+
*/
99+
struct gve_tx_buffer_state {
100+
struct sk_buff *skb; /* skb for this pkt */
101+
struct gve_tx_iovec iov[GVE_TX_MAX_IOVEC]; /* segments of this pkt */
102+
};
103+
104+
/* A TX buffer - each queue has one */
105+
struct gve_tx_fifo {
106+
void *base; /* address of base of FIFO */
107+
u32 size; /* total size */
108+
atomic_t available; /* how much space is still available */
109+
u32 head; /* offset to write at */
110+
struct gve_queue_page_list *qpl; /* QPL mapped into this FIFO */
111+
};
112+
113+
/* A TX ring that contains a power-of-two sized desc ring and a FIFO buffer */
114+
struct gve_tx_ring {
115+
/* Cacheline 0 -- Accessed & dirtied during transmit */
116+
struct gve_tx_fifo tx_fifo;
117+
u32 req; /* driver tracked head pointer */
118+
u32 done; /* driver tracked tail pointer */
119+
120+
/* Cacheline 1 -- Accessed & dirtied during gve_clean_tx_done */
121+
__be32 last_nic_done ____cacheline_aligned; /* NIC tail pointer */
122+
u64 pkt_done; /* free-running - total packets completed */
123+
u64 bytes_done; /* free-running - total bytes completed */
124+
125+
/* Cacheline 2 -- Read-mostly fields */
126+
union gve_tx_desc *desc ____cacheline_aligned;
127+
struct gve_tx_buffer_state *info; /* Maps 1:1 to a desc */
128+
struct netdev_queue *netdev_txq;
129+
struct gve_queue_resources *q_resources; /* head and tail pointer idx */
130+
u32 mask; /* masks req and done down to queue size */
131+
132+
/* Slow-path fields */
133+
u32 q_num ____cacheline_aligned; /* queue idx */
134+
u32 stop_queue; /* count of queue stops */
135+
u32 wake_queue; /* count of queue wakes */
136+
u32 ntfy_id; /* notification block index */
137+
dma_addr_t bus; /* dma address of the descr ring */
138+
dma_addr_t q_resources_bus; /* dma address of the queue resources */
139+
struct u64_stats_sync statss; /* sync stats for 32bit archs */
140+
} ____cacheline_aligned;
141+
142+
/* Wraps the info for one irq including the napi struct and the queues
143+
* associated with that irq.
144+
*/
26145
struct gve_notify_block {
27146
__be32 irq_db_index; /* idx into Bar2 - set by device, must be 1st */
28147
char name[IFNAMSIZ + 16]; /* name registered with the kernel */
29148
struct napi_struct napi; /* kernel napi struct for this block */
30149
struct gve_priv *priv;
150+
struct gve_tx_ring *tx; /* tx rings on this block */
151+
struct gve_rx_ring *rx; /* rx rings on this block */
31152
} ____cacheline_aligned;
32153

154+
/* Tracks allowed and current queue settings */
155+
struct gve_queue_config {
156+
u16 max_queues;
157+
u16 num_queues; /* current */
158+
};
159+
160+
/* Tracks the available and used qpl IDs */
161+
struct gve_qpl_config {
162+
u32 qpl_map_size; /* map memory size */
163+
unsigned long *qpl_id_map; /* bitmap of used qpl ids */
164+
};
165+
33166
struct gve_priv {
34167
struct net_device *dev;
168+
struct gve_tx_ring *tx; /* array of tx_cfg.num_queues */
169+
struct gve_rx_ring *rx; /* array of rx_cfg.num_queues */
170+
struct gve_queue_page_list *qpls; /* array of num qpls */
35171
struct gve_notify_block *ntfy_blocks; /* array of num_ntfy_blks */
36172
dma_addr_t ntfy_block_bus;
37173
struct msix_entry *msix_vectors; /* array of num_ntfy_blks + 1 */
@@ -41,14 +177,28 @@ struct gve_priv {
41177
dma_addr_t counter_array_bus;
42178

43179
u16 num_event_counters;
180+
u16 tx_desc_cnt; /* num desc per ring */
181+
u16 rx_desc_cnt; /* num desc per ring */
182+
u16 tx_pages_per_qpl; /* tx buffer length */
183+
u16 rx_pages_per_qpl; /* rx buffer length */
184+
u64 max_registered_pages;
185+
u64 num_registered_pages; /* num pages registered with NIC */
186+
u32 rx_copybreak; /* copy packets smaller than this */
187+
u16 default_num_queues; /* default num queues to set up */
44188

189+
struct gve_queue_config tx_cfg;
190+
struct gve_queue_config rx_cfg;
191+
struct gve_qpl_config qpl_cfg; /* map used QPL ids */
45192
u32 num_ntfy_blks; /* spilt between TX and RX so must be even */
46193

47194
struct gve_registers __iomem *reg_bar0; /* see gve_register.h */
48195
__be32 __iomem *db_bar2; /* "array" of doorbells */
49196
u32 msg_enable; /* level for netif* netdev print macros */
50197
struct pci_dev *pdev;
51198

199+
/* metrics */
200+
u32 tx_timeo_cnt;
201+
52202
/* Admin queue - see gve_adminq.h*/
53203
union gve_adminq_command *adminq;
54204
dma_addr_t adminq_bus_addr;
@@ -132,4 +282,112 @@ static inline __be32 __iomem *gve_irq_doorbell(struct gve_priv *priv,
132282
{
133283
return &priv->db_bar2[be32_to_cpu(block->irq_db_index)];
134284
}
285+
286+
/* Returns the index into ntfy_blocks of the given tx ring's block
287+
*/
288+
static inline u32 gve_tx_idx_to_ntfy(struct gve_priv *priv, u32 queue_idx)
289+
{
290+
return queue_idx;
291+
}
292+
293+
/* Returns the index into ntfy_blocks of the given rx ring's block
294+
*/
295+
static inline u32 gve_rx_idx_to_ntfy(struct gve_priv *priv, u32 queue_idx)
296+
{
297+
return (priv->num_ntfy_blks / 2) + queue_idx;
298+
}
299+
300+
/* Returns the number of tx queue page lists
301+
*/
302+
static inline u32 gve_num_tx_qpls(struct gve_priv *priv)
303+
{
304+
return priv->tx_cfg.num_queues;
305+
}
306+
307+
/* Returns the number of rx queue page lists
308+
*/
309+
static inline u32 gve_num_rx_qpls(struct gve_priv *priv)
310+
{
311+
return priv->rx_cfg.num_queues;
312+
}
313+
314+
/* Returns a pointer to the next available tx qpl in the list of qpls
315+
*/
316+
static inline
317+
struct gve_queue_page_list *gve_assign_tx_qpl(struct gve_priv *priv)
318+
{
319+
int id = find_first_zero_bit(priv->qpl_cfg.qpl_id_map,
320+
priv->qpl_cfg.qpl_map_size);
321+
322+
/* we are out of tx qpls */
323+
if (id >= gve_num_tx_qpls(priv))
324+
return NULL;
325+
326+
set_bit(id, priv->qpl_cfg.qpl_id_map);
327+
return &priv->qpls[id];
328+
}
329+
330+
/* Returns a pointer to the next available rx qpl in the list of qpls
331+
*/
332+
static inline
333+
struct gve_queue_page_list *gve_assign_rx_qpl(struct gve_priv *priv)
334+
{
335+
int id = find_next_zero_bit(priv->qpl_cfg.qpl_id_map,
336+
priv->qpl_cfg.qpl_map_size,
337+
gve_num_tx_qpls(priv));
338+
339+
/* we are out of rx qpls */
340+
if (id == priv->qpl_cfg.qpl_map_size)
341+
return NULL;
342+
343+
set_bit(id, priv->qpl_cfg.qpl_id_map);
344+
return &priv->qpls[id];
345+
}
346+
347+
/* Unassigns the qpl with the given id
348+
*/
349+
static inline void gve_unassign_qpl(struct gve_priv *priv, int id)
350+
{
351+
clear_bit(id, priv->qpl_cfg.qpl_id_map);
352+
}
353+
354+
/* Returns the correct dma direction for tx and rx qpls
355+
*/
356+
static inline enum dma_data_direction gve_qpl_dma_dir(struct gve_priv *priv,
357+
int id)
358+
{
359+
if (id < gve_num_tx_qpls(priv))
360+
return DMA_TO_DEVICE;
361+
else
362+
return DMA_FROM_DEVICE;
363+
}
364+
365+
/* Returns true if the max mtu allows page recycling */
366+
static inline bool gve_can_recycle_pages(struct net_device *dev)
367+
{
368+
/* We can't recycle the pages if we can't fit a packet into half a
369+
* page.
370+
*/
371+
return dev->max_mtu <= PAGE_SIZE / 2;
372+
}
373+
374+
/* buffers */
375+
int gve_alloc_page(struct device *dev, struct page **page, dma_addr_t *dma,
376+
enum dma_data_direction);
377+
void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma,
378+
enum dma_data_direction);
379+
/* tx handling */
380+
netdev_tx_t gve_tx(struct sk_buff *skb, struct net_device *dev);
381+
bool gve_tx_poll(struct gve_notify_block *block, int budget);
382+
int gve_tx_alloc_rings(struct gve_priv *priv);
383+
void gve_tx_free_rings(struct gve_priv *priv);
384+
__be32 gve_tx_load_event_counter(struct gve_priv *priv,
385+
struct gve_tx_ring *tx);
386+
/* rx handling */
387+
void gve_rx_write_doorbell(struct gve_priv *priv, struct gve_rx_ring *rx);
388+
bool gve_rx_poll(struct gve_notify_block *block, int budget);
389+
int gve_rx_alloc_rings(struct gve_priv *priv);
390+
void gve_rx_free_rings(struct gve_priv *priv);
391+
bool gve_clean_rx_done(struct gve_rx_ring *rx, int budget,
392+
netdev_features_t feat);
135393
#endif /* _GVE_H_ */

0 commit comments

Comments
 (0)