Skip to content

Commit f9c589e

Browse files
matnymangregkh
authored andcommitted
xhci: TD-fragment, align the unsplittable case with a bounce buffer
If the last trb before a link is not packet size aligned, and is not splittable then use a bounce buffer for that chunk of max packet size unalignable data. Allocate a max packet size bounce buffer for every segment of a bulk endpoint ring at the same time as allocating the ring. If we need to align the data before the link trb in that segment then copy the data to the segment bounce buffer, dma map it, and enqueue it. Once the td finishes, or is cancelled, unmap it. For in transfers we need to first map the bounce buffer, then queue it, after it finishes, copy the bounce buffer to the original sg list, and finally unmap it Signed-off-by: Mathias Nyman <[email protected]> Signed-off-by: Greg Kroah-Hartman <[email protected]>
1 parent 474ed23 commit f9c589e

File tree

4 files changed

+155
-40
lines changed

4 files changed

+155
-40
lines changed

drivers/usb/host/xhci-mem.c

Lines changed: 46 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,9 @@
3737
* "All components of all Command and Transfer TRBs shall be initialized to '0'"
3838
*/
3939
static struct xhci_segment *xhci_segment_alloc(struct xhci_hcd *xhci,
40-
unsigned int cycle_state, gfp_t flags)
40+
unsigned int cycle_state,
41+
unsigned int max_packet,
42+
gfp_t flags)
4143
{
4244
struct xhci_segment *seg;
4345
dma_addr_t dma;
@@ -53,6 +55,14 @@ static struct xhci_segment *xhci_segment_alloc(struct xhci_hcd *xhci,
5355
return NULL;
5456
}
5557

58+
if (max_packet) {
59+
seg->bounce_buf = kzalloc(max_packet, flags | GFP_DMA);
60+
if (!seg->bounce_buf) {
61+
dma_pool_free(xhci->segment_pool, seg->trbs, dma);
62+
kfree(seg);
63+
return NULL;
64+
}
65+
}
5666
/* If the cycle state is 0, set the cycle bit to 1 for all the TRBs */
5767
if (cycle_state == 0) {
5868
for (i = 0; i < TRBS_PER_SEGMENT; i++)
@@ -70,6 +80,7 @@ static void xhci_segment_free(struct xhci_hcd *xhci, struct xhci_segment *seg)
7080
dma_pool_free(xhci->segment_pool, seg->trbs, seg->dma);
7181
seg->trbs = NULL;
7282
}
83+
kfree(seg->bounce_buf);
7384
kfree(seg);
7485
}
7586

@@ -317,11 +328,11 @@ static void xhci_initialize_ring_info(struct xhci_ring *ring,
317328
static int xhci_alloc_segments_for_ring(struct xhci_hcd *xhci,
318329
struct xhci_segment **first, struct xhci_segment **last,
319330
unsigned int num_segs, unsigned int cycle_state,
320-
enum xhci_ring_type type, gfp_t flags)
331+
enum xhci_ring_type type, unsigned int max_packet, gfp_t flags)
321332
{
322333
struct xhci_segment *prev;
323334

324-
prev = xhci_segment_alloc(xhci, cycle_state, flags);
335+
prev = xhci_segment_alloc(xhci, cycle_state, max_packet, flags);
325336
if (!prev)
326337
return -ENOMEM;
327338
num_segs--;
@@ -330,7 +341,7 @@ static int xhci_alloc_segments_for_ring(struct xhci_hcd *xhci,
330341
while (num_segs > 0) {
331342
struct xhci_segment *next;
332343

333-
next = xhci_segment_alloc(xhci, cycle_state, flags);
344+
next = xhci_segment_alloc(xhci, cycle_state, max_packet, flags);
334345
if (!next) {
335346
prev = *first;
336347
while (prev) {
@@ -360,7 +371,7 @@ static int xhci_alloc_segments_for_ring(struct xhci_hcd *xhci,
360371
*/
361372
static struct xhci_ring *xhci_ring_alloc(struct xhci_hcd *xhci,
362373
unsigned int num_segs, unsigned int cycle_state,
363-
enum xhci_ring_type type, gfp_t flags)
374+
enum xhci_ring_type type, unsigned int max_packet, gfp_t flags)
364375
{
365376
struct xhci_ring *ring;
366377
int ret;
@@ -370,13 +381,15 @@ static struct xhci_ring *xhci_ring_alloc(struct xhci_hcd *xhci,
370381
return NULL;
371382

372383
ring->num_segs = num_segs;
384+
ring->bounce_buf_len = max_packet;
373385
INIT_LIST_HEAD(&ring->td_list);
374386
ring->type = type;
375387
if (num_segs == 0)
376388
return ring;
377389

378390
ret = xhci_alloc_segments_for_ring(xhci, &ring->first_seg,
379-
&ring->last_seg, num_segs, cycle_state, type, flags);
391+
&ring->last_seg, num_segs, cycle_state, type,
392+
max_packet, flags);
380393
if (ret)
381394
goto fail;
382395

@@ -470,7 +483,8 @@ int xhci_ring_expansion(struct xhci_hcd *xhci, struct xhci_ring *ring,
470483
ring->num_segs : num_segs_needed;
471484

472485
ret = xhci_alloc_segments_for_ring(xhci, &first, &last,
473-
num_segs, ring->cycle_state, ring->type, flags);
486+
num_segs, ring->cycle_state, ring->type,
487+
ring->bounce_buf_len, flags);
474488
if (ret)
475489
return -ENOMEM;
476490

@@ -652,7 +666,8 @@ struct xhci_ring *xhci_stream_id_to_ring(
652666
*/
653667
struct xhci_stream_info *xhci_alloc_stream_info(struct xhci_hcd *xhci,
654668
unsigned int num_stream_ctxs,
655-
unsigned int num_streams, gfp_t mem_flags)
669+
unsigned int num_streams,
670+
unsigned int max_packet, gfp_t mem_flags)
656671
{
657672
struct xhci_stream_info *stream_info;
658673
u32 cur_stream;
@@ -704,9 +719,11 @@ struct xhci_stream_info *xhci_alloc_stream_info(struct xhci_hcd *xhci,
704719
* and add their segment DMA addresses to the radix tree.
705720
* Stream 0 is reserved.
706721
*/
722+
707723
for (cur_stream = 1; cur_stream < num_streams; cur_stream++) {
708724
stream_info->stream_rings[cur_stream] =
709-
xhci_ring_alloc(xhci, 2, 1, TYPE_STREAM, mem_flags);
725+
xhci_ring_alloc(xhci, 2, 1, TYPE_STREAM, max_packet,
726+
mem_flags);
710727
cur_ring = stream_info->stream_rings[cur_stream];
711728
if (!cur_ring)
712729
goto cleanup_rings;
@@ -1003,7 +1020,7 @@ int xhci_alloc_virt_device(struct xhci_hcd *xhci, int slot_id,
10031020
}
10041021

10051022
/* Allocate endpoint 0 ring */
1006-
dev->eps[0].ring = xhci_ring_alloc(xhci, 2, 1, TYPE_CTRL, flags);
1023+
dev->eps[0].ring = xhci_ring_alloc(xhci, 2, 1, TYPE_CTRL, 0, flags);
10071024
if (!dev->eps[0].ring)
10081025
goto fail;
10091026

@@ -1434,22 +1451,6 @@ int xhci_endpoint_init(struct xhci_hcd *xhci,
14341451
return -EINVAL;
14351452

14361453
ring_type = usb_endpoint_type(&ep->desc);
1437-
/* Set up the endpoint ring */
1438-
virt_dev->eps[ep_index].new_ring =
1439-
xhci_ring_alloc(xhci, 2, 1, ring_type, mem_flags);
1440-
if (!virt_dev->eps[ep_index].new_ring) {
1441-
/* Attempt to use the ring cache */
1442-
if (virt_dev->num_rings_cached == 0)
1443-
return -ENOMEM;
1444-
virt_dev->num_rings_cached--;
1445-
virt_dev->eps[ep_index].new_ring =
1446-
virt_dev->ring_cache[virt_dev->num_rings_cached];
1447-
virt_dev->ring_cache[virt_dev->num_rings_cached] = NULL;
1448-
xhci_reinit_cached_ring(xhci, virt_dev->eps[ep_index].new_ring,
1449-
1, ring_type);
1450-
}
1451-
virt_dev->eps[ep_index].skip = false;
1452-
ep_ring = virt_dev->eps[ep_index].new_ring;
14531454

14541455
/*
14551456
* Get values to fill the endpoint context, mostly from ep descriptor.
@@ -1479,6 +1480,23 @@ int xhci_endpoint_init(struct xhci_hcd *xhci,
14791480
if ((xhci->hci_version > 0x100) && HCC2_LEC(xhci->hcc_params2))
14801481
mult = 0;
14811482

1483+
/* Set up the endpoint ring */
1484+
virt_dev->eps[ep_index].new_ring =
1485+
xhci_ring_alloc(xhci, 2, 1, ring_type, max_packet, mem_flags);
1486+
if (!virt_dev->eps[ep_index].new_ring) {
1487+
/* Attempt to use the ring cache */
1488+
if (virt_dev->num_rings_cached == 0)
1489+
return -ENOMEM;
1490+
virt_dev->num_rings_cached--;
1491+
virt_dev->eps[ep_index].new_ring =
1492+
virt_dev->ring_cache[virt_dev->num_rings_cached];
1493+
virt_dev->ring_cache[virt_dev->num_rings_cached] = NULL;
1494+
xhci_reinit_cached_ring(xhci, virt_dev->eps[ep_index].new_ring,
1495+
1, ring_type);
1496+
}
1497+
virt_dev->eps[ep_index].skip = false;
1498+
ep_ring = virt_dev->eps[ep_index].new_ring;
1499+
14821500
/* Fill the endpoint context */
14831501
ep_ctx->ep_info = cpu_to_le32(EP_MAX_ESIT_PAYLOAD_HI(max_esit_payload) |
14841502
EP_INTERVAL(interval) |
@@ -2409,7 +2427,7 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags)
24092427
goto fail;
24102428

24112429
/* Set up the command ring to have one segments for now. */
2412-
xhci->cmd_ring = xhci_ring_alloc(xhci, 1, 1, TYPE_COMMAND, flags);
2430+
xhci->cmd_ring = xhci_ring_alloc(xhci, 1, 1, TYPE_COMMAND, 0, flags);
24132431
if (!xhci->cmd_ring)
24142432
goto fail;
24152433
xhci_dbg_trace(xhci, trace_xhci_dbg_init,
@@ -2454,7 +2472,7 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags)
24542472
*/
24552473
xhci_dbg_trace(xhci, trace_xhci_dbg_init, "// Allocating event ring");
24562474
xhci->event_ring = xhci_ring_alloc(xhci, ERST_NUM_SEGS, 1, TYPE_EVENT,
2457-
flags);
2475+
0, flags);
24582476
if (!xhci->event_ring)
24592477
goto fail;
24602478
if (xhci_check_trb_in_td_math(xhci) < 0)

drivers/usb/host/xhci-ring.c

Lines changed: 96 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@
6666

6767
#include <linux/scatterlist.h>
6868
#include <linux/slab.h>
69+
#include <linux/dma-mapping.h>
6970
#include "xhci.h"
7071
#include "xhci-trace.h"
7172
#include "xhci-mtk.h"
@@ -626,6 +627,31 @@ static void xhci_giveback_urb_in_irq(struct xhci_hcd *xhci,
626627
}
627628
}
628629

630+
void xhci_unmap_td_bounce_buffer(struct xhci_hcd *xhci, struct xhci_ring *ring,
631+
struct xhci_td *td)
632+
{
633+
struct device *dev = xhci_to_hcd(xhci)->self.controller;
634+
struct xhci_segment *seg = td->bounce_seg;
635+
struct urb *urb = td->urb;
636+
637+
if (!seg || !urb)
638+
return;
639+
640+
if (usb_urb_dir_out(urb)) {
641+
dma_unmap_single(dev, seg->bounce_dma, ring->bounce_buf_len,
642+
DMA_TO_DEVICE);
643+
return;
644+
}
645+
646+
/* for in tranfers we need to copy the data from bounce to sg */
647+
sg_pcopy_from_buffer(urb->sg, urb->num_mapped_sgs, seg->bounce_buf,
648+
seg->bounce_len, seg->bounce_offs);
649+
dma_unmap_single(dev, seg->bounce_dma, ring->bounce_buf_len,
650+
DMA_FROM_DEVICE);
651+
seg->bounce_len = 0;
652+
seg->bounce_offs = 0;
653+
}
654+
629655
/*
630656
* When we get a command completion for a Stop Endpoint Command, we need to
631657
* unlink any cancelled TDs from the ring. There are two ways to do that:
@@ -745,6 +771,8 @@ static void xhci_handle_cmd_stop_ep(struct xhci_hcd *xhci, int slot_id,
745771
/* Doesn't matter what we pass for status, since the core will
746772
* just overwrite it (because the URB has been unlinked).
747773
*/
774+
if (ep_ring && cur_td->bounce_seg)
775+
xhci_unmap_td_bounce_buffer(xhci, ep_ring, cur_td);
748776
xhci_giveback_urb_in_irq(xhci, cur_td, 0);
749777

750778
/* Stop processing the cancelled list if the watchdog timer is
@@ -767,6 +795,9 @@ static void xhci_kill_ring_urbs(struct xhci_hcd *xhci, struct xhci_ring *ring)
767795
list_del_init(&cur_td->td_list);
768796
if (!list_empty(&cur_td->cancelled_td_list))
769797
list_del_init(&cur_td->cancelled_td_list);
798+
799+
if (cur_td->bounce_seg)
800+
xhci_unmap_td_bounce_buffer(xhci, ring, cur_td);
770801
xhci_giveback_urb_in_irq(xhci, cur_td, -ESHUTDOWN);
771802
}
772803
}
@@ -1865,6 +1896,10 @@ static int finish_td(struct xhci_hcd *xhci, struct xhci_td *td,
18651896
urb = td->urb;
18661897
urb_priv = urb->hcpriv;
18671898

1899+
/* if a bounce buffer was used to align this td then unmap it */
1900+
if (td->bounce_seg)
1901+
xhci_unmap_td_bounce_buffer(xhci, ep_ring, td);
1902+
18681903
/* Do one last check of the actual transfer length.
18691904
* If the host controller said we transferred more data than the buffer
18701905
* length, urb->actual_length will be a very big number (since it's
@@ -3116,11 +3151,14 @@ static u32 xhci_td_remainder(struct xhci_hcd *xhci, int transferred,
31163151
return (total_packet_count - ((transferred + trb_buff_len) / maxp));
31173152
}
31183153

3154+
31193155
static int xhci_align_td(struct xhci_hcd *xhci, struct urb *urb, u32 enqd_len,
3120-
u32 *trb_buff_len)
3156+
u32 *trb_buff_len, struct xhci_segment *seg)
31213157
{
3158+
struct device *dev = xhci_to_hcd(xhci)->self.controller;
31223159
unsigned int unalign;
31233160
unsigned int max_pkt;
3161+
u32 new_buff_len;
31243162

31253163
max_pkt = GET_MAX_PACKET(usb_endpoint_maxp(&urb->ep->desc));
31263164
unalign = (enqd_len + *trb_buff_len) % max_pkt;
@@ -3129,11 +3167,48 @@ static int xhci_align_td(struct xhci_hcd *xhci, struct urb *urb, u32 enqd_len,
31293167
if (unalign == 0)
31303168
return 0;
31313169

3170+
xhci_dbg(xhci, "Unaligned %d bytes, buff len %d\n",
3171+
unalign, *trb_buff_len);
3172+
31323173
/* is the last nornal TRB alignable by splitting it */
31333174
if (*trb_buff_len > unalign) {
31343175
*trb_buff_len -= unalign;
3176+
xhci_dbg(xhci, "split align, new buff len %d\n", *trb_buff_len);
31353177
return 0;
31363178
}
3179+
3180+
/*
3181+
* We want enqd_len + trb_buff_len to sum up to a number aligned to
3182+
* number which is divisible by the endpoint's wMaxPacketSize. IOW:
3183+
* (size of currently enqueued TRBs + remainder) % wMaxPacketSize == 0.
3184+
*/
3185+
new_buff_len = max_pkt - (enqd_len % max_pkt);
3186+
3187+
if (new_buff_len > (urb->transfer_buffer_length - enqd_len))
3188+
new_buff_len = (urb->transfer_buffer_length - enqd_len);
3189+
3190+
/* create a max max_pkt sized bounce buffer pointed to by last trb */
3191+
if (usb_urb_dir_out(urb)) {
3192+
sg_pcopy_to_buffer(urb->sg, urb->num_mapped_sgs,
3193+
seg->bounce_buf, new_buff_len, enqd_len);
3194+
seg->bounce_dma = dma_map_single(dev, seg->bounce_buf,
3195+
max_pkt, DMA_TO_DEVICE);
3196+
} else {
3197+
seg->bounce_dma = dma_map_single(dev, seg->bounce_buf,
3198+
max_pkt, DMA_FROM_DEVICE);
3199+
}
3200+
3201+
if (dma_mapping_error(dev, seg->bounce_dma)) {
3202+
/* try without aligning. Some host controllers survive */
3203+
xhci_warn(xhci, "Failed mapping bounce buffer, not aligning\n");
3204+
return 0;
3205+
}
3206+
*trb_buff_len = new_buff_len;
3207+
seg->bounce_len = new_buff_len;
3208+
seg->bounce_offs = enqd_len;
3209+
3210+
xhci_dbg(xhci, "Bounce align, new buff len %d\n", *trb_buff_len);
3211+
31373212
return 1;
31383213
}
31393214

@@ -3152,9 +3227,9 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
31523227
unsigned int num_trbs;
31533228
unsigned int start_cycle, num_sgs = 0;
31543229
unsigned int enqd_len, block_len, trb_buff_len, full_len;
3155-
int ret;
3230+
int sent_len, ret;
31563231
u32 field, length_field, remainder;
3157-
u64 addr;
3232+
u64 addr, send_addr;
31583233

31593234
ring = xhci_urb_to_transfer_ring(xhci, urb);
31603235
if (!ring)
@@ -3194,6 +3269,7 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
31943269
*/
31953270
start_trb = &ring->enqueue->generic;
31963271
start_cycle = ring->cycle_state;
3272+
send_addr = addr;
31973273

31983274
/* Queue the TRBs, even if they are zero-length */
31993275
for (enqd_len = 0; enqd_len < full_len; enqd_len += trb_buff_len) {
@@ -3222,10 +3298,16 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
32223298
if (last_trb(xhci, ring, ring->enq_seg,
32233299
ring->enqueue + 1)) {
32243300
if (xhci_align_td(xhci, urb, enqd_len,
3225-
&trb_buff_len))
3226-
xhci_dbg(xhci, "TRB align fail\n");
3301+
&trb_buff_len,
3302+
ring->enq_seg)) {
3303+
send_addr = ring->enq_seg->bounce_dma;
3304+
/* assuming TD won't span 2 segs */
3305+
td->bounce_seg = ring->enq_seg;
3306+
}
32273307
}
3228-
} else {
3308+
}
3309+
if (enqd_len + trb_buff_len >= full_len) {
3310+
field &= ~TRB_CHAIN;
32293311
field |= TRB_IOC;
32303312
more_trbs_coming = false;
32313313
td->last_trb = ring->enqueue;
@@ -3244,23 +3326,27 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
32443326
TRB_INTR_TARGET(0);
32453327

32463328
queue_trb(xhci, ring, more_trbs_coming | need_zero_pkt,
3247-
lower_32_bits(addr),
3248-
upper_32_bits(addr),
3329+
lower_32_bits(send_addr),
3330+
upper_32_bits(send_addr),
32493331
length_field,
32503332
field);
32513333

32523334
addr += trb_buff_len;
3253-
block_len -= trb_buff_len;
3335+
sent_len = trb_buff_len;
32543336

3255-
if (sg && block_len == 0) {
3337+
while (sg && sent_len >= block_len) {
32563338
/* New sg entry */
32573339
--num_sgs;
3340+
sent_len -= block_len;
32583341
if (num_sgs != 0) {
32593342
sg = sg_next(sg);
32603343
block_len = sg_dma_len(sg);
32613344
addr = (u64) sg_dma_address(sg);
3345+
addr += sent_len;
32623346
}
32633347
}
3348+
block_len -= sent_len;
3349+
send_addr = addr;
32643350
}
32653351

32663352
if (need_zero_pkt) {

0 commit comments

Comments
 (0)