Skip to content

Commit 9cde945

Browse files
Felix Fietkaudavem330
authored andcommitted
bgmac: implement scatter/gather support
Always use software checksumming, since the hardware does not have any checksum offload support. This significantly improves local TCP tx performance. Signed-off-by: Felix Fietkau <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 45c9b3c commit 9cde945

File tree

1 file changed

+121
-43
lines changed
  • drivers/net/ethernet/broadcom

1 file changed

+121
-43
lines changed

drivers/net/ethernet/broadcom/bgmac.c

Lines changed: 121 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -115,53 +115,91 @@ static void bgmac_dma_tx_enable(struct bgmac *bgmac,
115115
bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_CTL, ctl);
116116
}
117117

118+
static void
119+
bgmac_dma_tx_add_buf(struct bgmac *bgmac, struct bgmac_dma_ring *ring,
120+
int i, int len, u32 ctl0)
121+
{
122+
struct bgmac_slot_info *slot;
123+
struct bgmac_dma_desc *dma_desc;
124+
u32 ctl1;
125+
126+
if (i == ring->num_slots - 1)
127+
ctl0 |= BGMAC_DESC_CTL0_EOT;
128+
129+
ctl1 = len & BGMAC_DESC_CTL1_LEN;
130+
131+
slot = &ring->slots[i];
132+
dma_desc = &ring->cpu_base[i];
133+
dma_desc->addr_low = cpu_to_le32(lower_32_bits(slot->dma_addr));
134+
dma_desc->addr_high = cpu_to_le32(upper_32_bits(slot->dma_addr));
135+
dma_desc->ctl0 = cpu_to_le32(ctl0);
136+
dma_desc->ctl1 = cpu_to_le32(ctl1);
137+
}
138+
118139
static netdev_tx_t bgmac_dma_tx_add(struct bgmac *bgmac,
119140
struct bgmac_dma_ring *ring,
120141
struct sk_buff *skb)
121142
{
122143
struct device *dma_dev = bgmac->core->dma_dev;
123144
struct net_device *net_dev = bgmac->net_dev;
124-
struct bgmac_dma_desc *dma_desc;
125-
struct bgmac_slot_info *slot;
126-
u32 ctl0, ctl1;
145+
struct bgmac_slot_info *slot = &ring->slots[ring->end];
127146
int free_slots;
147+
int nr_frags;
148+
u32 flags;
149+
int index = ring->end;
150+
int i;
128151

129152
if (skb->len > BGMAC_DESC_CTL1_LEN) {
130153
bgmac_err(bgmac, "Too long skb (%d)\n", skb->len);
131-
goto err_stop_drop;
154+
goto err_drop;
132155
}
133156

157+
if (skb->ip_summed == CHECKSUM_PARTIAL)
158+
skb_checksum_help(skb);
159+
160+
nr_frags = skb_shinfo(skb)->nr_frags;
161+
134162
if (ring->start <= ring->end)
135163
free_slots = ring->start - ring->end + BGMAC_TX_RING_SLOTS;
136164
else
137165
free_slots = ring->start - ring->end;
138-
if (free_slots == 1) {
166+
167+
if (free_slots <= nr_frags + 1) {
139168
bgmac_err(bgmac, "TX ring is full, queue should be stopped!\n");
140169
netif_stop_queue(net_dev);
141170
return NETDEV_TX_BUSY;
142171
}
143172

144-
slot = &ring->slots[ring->end];
145-
slot->skb = skb;
146-
slot->dma_addr = dma_map_single(dma_dev, skb->data, skb->len,
173+
slot->dma_addr = dma_map_single(dma_dev, skb->data, skb_headlen(skb),
147174
DMA_TO_DEVICE);
148-
if (dma_mapping_error(dma_dev, slot->dma_addr)) {
149-
bgmac_err(bgmac, "Mapping error of skb on ring 0x%X\n",
150-
ring->mmio_base);
151-
goto err_stop_drop;
152-
}
175+
if (unlikely(dma_mapping_error(dma_dev, slot->dma_addr)))
176+
goto err_dma_head;
153177

154-
ctl0 = BGMAC_DESC_CTL0_IOC | BGMAC_DESC_CTL0_SOF | BGMAC_DESC_CTL0_EOF;
155-
if (ring->end == ring->num_slots - 1)
156-
ctl0 |= BGMAC_DESC_CTL0_EOT;
157-
ctl1 = skb->len & BGMAC_DESC_CTL1_LEN;
178+
flags = BGMAC_DESC_CTL0_SOF;
179+
if (!nr_frags)
180+
flags |= BGMAC_DESC_CTL0_EOF | BGMAC_DESC_CTL0_IOC;
158181

159-
dma_desc = ring->cpu_base;
160-
dma_desc += ring->end;
161-
dma_desc->addr_low = cpu_to_le32(lower_32_bits(slot->dma_addr));
162-
dma_desc->addr_high = cpu_to_le32(upper_32_bits(slot->dma_addr));
163-
dma_desc->ctl0 = cpu_to_le32(ctl0);
164-
dma_desc->ctl1 = cpu_to_le32(ctl1);
182+
bgmac_dma_tx_add_buf(bgmac, ring, index, skb_headlen(skb), flags);
183+
flags = 0;
184+
185+
for (i = 0; i < nr_frags; i++) {
186+
struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
187+
int len = skb_frag_size(frag);
188+
189+
index = (index + 1) % BGMAC_TX_RING_SLOTS;
190+
slot = &ring->slots[index];
191+
slot->dma_addr = skb_frag_dma_map(dma_dev, frag, 0,
192+
len, DMA_TO_DEVICE);
193+
if (unlikely(dma_mapping_error(dma_dev, slot->dma_addr)))
194+
goto err_dma;
195+
196+
if (i == nr_frags - 1)
197+
flags |= BGMAC_DESC_CTL0_EOF | BGMAC_DESC_CTL0_IOC;
198+
199+
bgmac_dma_tx_add_buf(bgmac, ring, index, len, flags);
200+
}
201+
202+
slot->skb = skb;
165203

166204
netdev_sent_queue(net_dev, skb->len);
167205

@@ -170,20 +208,35 @@ static netdev_tx_t bgmac_dma_tx_add(struct bgmac *bgmac,
170208
/* Increase ring->end to point empty slot. We tell hardware the first
171209
* slot it should *not* read.
172210
*/
173-
if (++ring->end >= BGMAC_TX_RING_SLOTS)
174-
ring->end = 0;
211+
ring->end = (index + 1) % BGMAC_TX_RING_SLOTS;
175212
bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_INDEX,
176213
ring->index_base +
177214
ring->end * sizeof(struct bgmac_dma_desc));
178215

179-
/* Always keep one slot free to allow detecting bugged calls. */
180-
if (--free_slots == 1)
216+
free_slots -= nr_frags + 1;
217+
if (free_slots < 8)
181218
netif_stop_queue(net_dev);
182219

183220
return NETDEV_TX_OK;
184221

185-
err_stop_drop:
186-
netif_stop_queue(net_dev);
222+
err_dma:
223+
dma_unmap_single(dma_dev, slot->dma_addr, skb_headlen(skb),
224+
DMA_TO_DEVICE);
225+
226+
while (i > 0) {
227+
int index = (ring->end + i) % BGMAC_TX_RING_SLOTS;
228+
struct bgmac_slot_info *slot = &ring->slots[index];
229+
u32 ctl1 = le32_to_cpu(ring->cpu_base[index].ctl1);
230+
int len = ctl1 & BGMAC_DESC_CTL1_LEN;
231+
232+
dma_unmap_page(dma_dev, slot->dma_addr, len, DMA_TO_DEVICE);
233+
}
234+
235+
err_dma_head:
236+
bgmac_err(bgmac, "Mapping error of skb on ring 0x%X\n",
237+
ring->mmio_base);
238+
239+
err_drop:
187240
dev_kfree_skb(skb);
188241
return NETDEV_TX_OK;
189242
}
@@ -205,32 +258,45 @@ static void bgmac_dma_tx_free(struct bgmac *bgmac, struct bgmac_dma_ring *ring)
205258

206259
while (ring->start != empty_slot) {
207260
struct bgmac_slot_info *slot = &ring->slots[ring->start];
261+
u32 ctl1 = le32_to_cpu(ring->cpu_base[ring->start].ctl1);
262+
int len = ctl1 & BGMAC_DESC_CTL1_LEN;
208263

209-
if (slot->skb) {
264+
if (!slot->dma_addr) {
265+
bgmac_err(bgmac, "Hardware reported transmission for empty TX ring slot %d! End of ring: %d\n",
266+
ring->start, ring->end);
267+
goto next;
268+
}
269+
270+
if (ctl1 & BGMAC_DESC_CTL0_SOF)
210271
/* Unmap no longer used buffer */
211-
dma_unmap_single(dma_dev, slot->dma_addr,
212-
slot->skb->len, DMA_TO_DEVICE);
213-
slot->dma_addr = 0;
272+
dma_unmap_single(dma_dev, slot->dma_addr, len,
273+
DMA_TO_DEVICE);
274+
else
275+
dma_unmap_page(dma_dev, slot->dma_addr, len,
276+
DMA_TO_DEVICE);
214277

278+
if (slot->skb) {
215279
bytes_compl += slot->skb->len;
216280
pkts_compl++;
217281

218282
/* Free memory! :) */
219283
dev_kfree_skb(slot->skb);
220284
slot->skb = NULL;
221-
} else {
222-
bgmac_err(bgmac, "Hardware reported transmission for empty TX ring slot %d! End of ring: %d\n",
223-
ring->start, ring->end);
224285
}
225286

287+
next:
288+
slot->dma_addr = 0;
226289
if (++ring->start >= BGMAC_TX_RING_SLOTS)
227290
ring->start = 0;
228291
freed = true;
229292
}
230293

294+
if (!pkts_compl)
295+
return;
296+
231297
netdev_completed_queue(bgmac->net_dev, pkts_compl, bytes_compl);
232298

233-
if (freed && netif_queue_stopped(bgmac->net_dev))
299+
if (netif_queue_stopped(bgmac->net_dev))
234300
netif_wake_queue(bgmac->net_dev);
235301
}
236302

@@ -439,17 +505,25 @@ static void bgmac_dma_tx_ring_free(struct bgmac *bgmac,
439505
struct bgmac_dma_ring *ring)
440506
{
441507
struct device *dma_dev = bgmac->core->dma_dev;
508+
struct bgmac_dma_desc *dma_desc = ring->cpu_base;
442509
struct bgmac_slot_info *slot;
443510
int i;
444511

445512
for (i = 0; i < ring->num_slots; i++) {
513+
int len = dma_desc[i].ctl1 & BGMAC_DESC_CTL1_LEN;
514+
446515
slot = &ring->slots[i];
447-
if (slot->skb) {
448-
if (slot->dma_addr)
449-
dma_unmap_single(dma_dev, slot->dma_addr,
450-
slot->skb->len, DMA_TO_DEVICE);
451-
dev_kfree_skb(slot->skb);
452-
}
516+
dev_kfree_skb(slot->skb);
517+
518+
if (!slot->dma_addr)
519+
continue;
520+
521+
if (slot->skb)
522+
dma_unmap_single(dma_dev, slot->dma_addr,
523+
len, DMA_TO_DEVICE);
524+
else
525+
dma_unmap_page(dma_dev, slot->dma_addr,
526+
len, DMA_TO_DEVICE);
453527
}
454528
}
455529

@@ -1583,6 +1657,10 @@ static int bgmac_probe(struct bcma_device *core)
15831657
goto err_dma_free;
15841658
}
15851659

1660+
net_dev->features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
1661+
net_dev->hw_features = net_dev->features;
1662+
net_dev->vlan_features = net_dev->features;
1663+
15861664
err = register_netdev(bgmac->net_dev);
15871665
if (err) {
15881666
bgmac_err(bgmac, "Cannot register net device\n");

0 commit comments

Comments
 (0)