Skip to content

Commit 6565439

Browse files
basuamdvinodkoul
authored andcommitted
dmaengine: ptdma: Utilize the AE4DMA engine's multi-queue functionality
As AE4DMA offers multi-channel functionality compared to PTDMA’s single queue, utilize multi-queue, which supports higher speeds than PTDMA, to achieve higher performance using the AE4DMA workqueue based mechanism. Fixes: 69a47b1 ("dmaengine: ptdma: Extend ptdma to support multi-channel and version") Signed-off-by: Basavaraj Natikar <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Vinod Koul <[email protected]>
1 parent feba04e commit 6565439

File tree

2 files changed

+89
-3
lines changed

2 files changed

+89
-3
lines changed

drivers/dma/amd/ae4dma/ae4dma.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@
3737
#define AE4_DMA_VERSION 4
3838
#define CMD_AE4_DESC_DW0_VAL 2
3939

40+
#define AE4_TIME_OUT 5000
41+
4042
struct ae4_msix {
4143
int msix_count;
4244
struct msix_entry msix_entry[MAX_AE4_HW_QUEUES];

drivers/dma/amd/ptdma/ptdma-dmaengine.c

Lines changed: 87 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -198,8 +198,10 @@ static struct pt_dma_desc *pt_handle_active_desc(struct pt_dma_chan *chan,
198198
{
199199
struct dma_async_tx_descriptor *tx_desc;
200200
struct virt_dma_desc *vd;
201+
struct pt_device *pt;
201202
unsigned long flags;
202203

204+
pt = chan->pt;
203205
/* Loop over descriptors until one is found with commands */
204206
do {
205207
if (desc) {
@@ -217,7 +219,7 @@ static struct pt_dma_desc *pt_handle_active_desc(struct pt_dma_chan *chan,
217219

218220
spin_lock_irqsave(&chan->vc.lock, flags);
219221

220-
if (desc) {
222+
if (pt->ver != AE4_DMA_VERSION && desc) {
221223
if (desc->status != DMA_COMPLETE) {
222224
if (desc->status != DMA_ERROR)
223225
desc->status = DMA_COMPLETE;
@@ -235,7 +237,7 @@ static struct pt_dma_desc *pt_handle_active_desc(struct pt_dma_chan *chan,
235237

236238
spin_unlock_irqrestore(&chan->vc.lock, flags);
237239

238-
if (tx_desc) {
240+
if (pt->ver != AE4_DMA_VERSION && tx_desc) {
239241
dmaengine_desc_get_callback_invoke(tx_desc, NULL);
240242
dma_run_dependencies(tx_desc);
241243
vchan_vdesc_fini(vd);
@@ -245,23 +247,58 @@ static struct pt_dma_desc *pt_handle_active_desc(struct pt_dma_chan *chan,
245247
return NULL;
246248
}
247249

250+
static inline bool ae4_core_queue_full(struct pt_cmd_queue *cmd_q)
251+
{
252+
u32 front_wi = readl(cmd_q->reg_control + AE4_WR_IDX_OFF);
253+
u32 rear_ri = readl(cmd_q->reg_control + AE4_RD_IDX_OFF);
254+
255+
if (((MAX_CMD_QLEN + front_wi - rear_ri) % MAX_CMD_QLEN) >= (MAX_CMD_QLEN - 1))
256+
return true;
257+
258+
return false;
259+
}
260+
248261
static void pt_cmd_callback(void *data, int err)
249262
{
250263
struct pt_dma_desc *desc = data;
264+
struct ae4_cmd_queue *ae4cmd_q;
251265
struct dma_chan *dma_chan;
252266
struct pt_dma_chan *chan;
267+
struct ae4_device *ae4;
268+
struct pt_device *pt;
253269
int ret;
254270

255271
if (err == -EINPROGRESS)
256272
return;
257273

258274
dma_chan = desc->vd.tx.chan;
259275
chan = to_pt_chan(dma_chan);
276+
pt = chan->pt;
260277

261278
if (err)
262279
desc->status = DMA_ERROR;
263280

264281
while (true) {
282+
if (pt->ver == AE4_DMA_VERSION) {
283+
ae4 = container_of(pt, struct ae4_device, pt);
284+
ae4cmd_q = &ae4->ae4cmd_q[chan->id];
285+
286+
if (ae4cmd_q->q_cmd_count >= (CMD_Q_LEN - 1) ||
287+
ae4_core_queue_full(&ae4cmd_q->cmd_q)) {
288+
wake_up(&ae4cmd_q->q_w);
289+
290+
if (wait_for_completion_timeout(&ae4cmd_q->cmp,
291+
msecs_to_jiffies(AE4_TIME_OUT))
292+
== 0) {
293+
dev_err(pt->dev, "TIMEOUT %d:\n", ae4cmd_q->id);
294+
break;
295+
}
296+
297+
reinit_completion(&ae4cmd_q->cmp);
298+
continue;
299+
}
300+
}
301+
265302
/* Check for DMA descriptor completion */
266303
desc = pt_handle_active_desc(chan, desc);
267304

@@ -296,6 +333,49 @@ static struct pt_dma_desc *pt_alloc_dma_desc(struct pt_dma_chan *chan,
296333
return desc;
297334
}
298335

336+
static void pt_cmd_callback_work(void *data, int err)
337+
{
338+
struct dma_async_tx_descriptor *tx_desc;
339+
struct pt_dma_desc *desc = data;
340+
struct dma_chan *dma_chan;
341+
struct virt_dma_desc *vd;
342+
struct pt_dma_chan *chan;
343+
unsigned long flags;
344+
345+
dma_chan = desc->vd.tx.chan;
346+
chan = to_pt_chan(dma_chan);
347+
348+
if (err == -EINPROGRESS)
349+
return;
350+
351+
tx_desc = &desc->vd.tx;
352+
vd = &desc->vd;
353+
354+
if (err)
355+
desc->status = DMA_ERROR;
356+
357+
spin_lock_irqsave(&chan->vc.lock, flags);
358+
if (desc) {
359+
if (desc->status != DMA_COMPLETE) {
360+
if (desc->status != DMA_ERROR)
361+
desc->status = DMA_COMPLETE;
362+
363+
dma_cookie_complete(tx_desc);
364+
dma_descriptor_unmap(tx_desc);
365+
} else {
366+
tx_desc = NULL;
367+
}
368+
}
369+
spin_unlock_irqrestore(&chan->vc.lock, flags);
370+
371+
if (tx_desc) {
372+
dmaengine_desc_get_callback_invoke(tx_desc, NULL);
373+
dma_run_dependencies(tx_desc);
374+
list_del(&desc->vd.node);
375+
vchan_vdesc_fini(vd);
376+
}
377+
}
378+
299379
static struct pt_dma_desc *pt_create_desc(struct dma_chan *dma_chan,
300380
dma_addr_t dst,
301381
dma_addr_t src,
@@ -327,6 +407,7 @@ static struct pt_dma_desc *pt_create_desc(struct dma_chan *dma_chan,
327407
desc->len = len;
328408

329409
if (pt->ver == AE4_DMA_VERSION) {
410+
pt_cmd->pt_cmd_callback = pt_cmd_callback_work;
330411
ae4 = container_of(pt, struct ae4_device, pt);
331412
ae4cmd_q = &ae4->ae4cmd_q[chan->id];
332413
mutex_lock(&ae4cmd_q->cmd_lock);
@@ -367,13 +448,16 @@ static void pt_issue_pending(struct dma_chan *dma_chan)
367448
{
368449
struct pt_dma_chan *chan = to_pt_chan(dma_chan);
369450
struct pt_dma_desc *desc;
451+
struct pt_device *pt;
370452
unsigned long flags;
371453
bool engine_is_idle = true;
372454

455+
pt = chan->pt;
456+
373457
spin_lock_irqsave(&chan->vc.lock, flags);
374458

375459
desc = pt_next_dma_desc(chan);
376-
if (desc)
460+
if (desc && pt->ver != AE4_DMA_VERSION)
377461
engine_is_idle = false;
378462

379463
vchan_issue_pending(&chan->vc);

0 commit comments

Comments
 (0)