Skip to content

Commit 90a30e2

Browse files
basuamdvinodkoul
authored andcommitted
dmaengine: ae4dma: Add AMD ae4dma controller driver
Add support for AMD AE4DMA controller. It performs high-bandwidth memory to memory and IO copy operation. Device commands are managed via a circular queue of 'descriptors', each of which specifies source and destination addresses for copying a single buffer of data. Reviewed-by: Raju Rangoju <[email protected]> Reviewed-by: Philipp Stanner <[email protected]> Signed-off-by: Basavaraj Natikar <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Vinod Koul <[email protected]>
1 parent e01ee7c commit 90a30e2

File tree

8 files changed

+485
-0
lines changed

8 files changed

+485
-0
lines changed

MAINTAINERS

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -980,6 +980,12 @@ L: [email protected]
980980
S: Supported
981981
F: drivers/ras/amd/atl/*
982982

983+
AMD AE4DMA DRIVER
984+
M: Basavaraj Natikar <[email protected]>
985+
986+
S: Supported
987+
F: drivers/dma/amd/ae4dma/
988+
983989
AMD AXI W1 DRIVER
984990
M: Kris Chaplin <[email protected]>
985991
R: Thomas Delev <[email protected]>

drivers/dma/amd/Kconfig

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,20 @@
11
# SPDX-License-Identifier: GPL-2.0-only
22
#
3+
4+
config AMD_AE4DMA
5+
tristate "AMD AE4DMA Engine"
6+
depends on (X86_64 || COMPILE_TEST) && PCI
7+
depends on AMD_PTDMA
8+
select DMA_ENGINE
9+
select DMA_VIRTUAL_CHANNELS
10+
help
11+
Enable support for the AMD AE4DMA controller. This controller
12+
provides DMA capabilities to perform high bandwidth memory to
13+
memory and IO copy operations. It performs DMA transfer through
14+
queue-based descriptor management. This DMA controller is intended
15+
to be used with AMD Non-Transparent Bridge devices and not for
16+
general purpose peripheral DMA.
17+
318
config AMD_PTDMA
419
tristate "AMD PassThru DMA Engine"
520
depends on X86_64 && PCI

drivers/dma/amd/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# SPDX-License-Identifier: GPL-2.0
22

3+
obj-$(CONFIG_AMD_AE4DMA) += ae4dma/
34
obj-$(CONFIG_AMD_PTDMA) += ptdma/
45
obj-$(CONFIG_AMD_QDMA) += qdma/

drivers/dma/amd/ae4dma/Makefile

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# SPDX-License-Identifier: GPL-2.0
2+
#
3+
# AMD AE4DMA driver
4+
#
5+
6+
obj-$(CONFIG_AMD_AE4DMA) += ae4dma.o
7+
8+
ae4dma-objs := ae4dma-dev.o
9+
10+
ae4dma-$(CONFIG_PCI) += ae4dma-pci.o

drivers/dma/amd/ae4dma/ae4dma-dev.c

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
/*
3+
* AMD AE4DMA driver
4+
*
5+
* Copyright (c) 2024, Advanced Micro Devices, Inc.
6+
* All Rights Reserved.
7+
*
8+
* Author: Basavaraj Natikar <[email protected]>
9+
*/
10+
11+
#include "ae4dma.h"
12+
13+
static unsigned int max_hw_q = 1;
14+
module_param(max_hw_q, uint, 0444);
15+
MODULE_PARM_DESC(max_hw_q, "max hw queues supported by engine (any non-zero value, default: 1)");
16+
17+
static void ae4_pending_work(struct work_struct *work)
18+
{
19+
struct ae4_cmd_queue *ae4cmd_q = container_of(work, struct ae4_cmd_queue, p_work.work);
20+
struct pt_cmd_queue *cmd_q = &ae4cmd_q->cmd_q;
21+
struct pt_cmd *cmd;
22+
u32 cridx;
23+
24+
for (;;) {
25+
wait_event_interruptible(ae4cmd_q->q_w,
26+
((atomic64_read(&ae4cmd_q->done_cnt)) <
27+
atomic64_read(&ae4cmd_q->intr_cnt)));
28+
29+
atomic64_inc(&ae4cmd_q->done_cnt);
30+
31+
mutex_lock(&ae4cmd_q->cmd_lock);
32+
cridx = readl(cmd_q->reg_control + AE4_RD_IDX_OFF);
33+
while ((ae4cmd_q->dridx != cridx) && !list_empty(&ae4cmd_q->cmd)) {
34+
cmd = list_first_entry(&ae4cmd_q->cmd, struct pt_cmd, entry);
35+
list_del(&cmd->entry);
36+
37+
ae4_check_status_error(ae4cmd_q, ae4cmd_q->dridx);
38+
cmd->pt_cmd_callback(cmd->data, cmd->ret);
39+
40+
ae4cmd_q->q_cmd_count--;
41+
ae4cmd_q->dridx = (ae4cmd_q->dridx + 1) % CMD_Q_LEN;
42+
43+
complete_all(&ae4cmd_q->cmp);
44+
}
45+
mutex_unlock(&ae4cmd_q->cmd_lock);
46+
}
47+
}
48+
49+
static irqreturn_t ae4_core_irq_handler(int irq, void *data)
50+
{
51+
struct ae4_cmd_queue *ae4cmd_q = data;
52+
struct pt_cmd_queue *cmd_q;
53+
struct pt_device *pt;
54+
u32 status;
55+
56+
cmd_q = &ae4cmd_q->cmd_q;
57+
pt = cmd_q->pt;
58+
59+
pt->total_interrupts++;
60+
atomic64_inc(&ae4cmd_q->intr_cnt);
61+
62+
status = readl(cmd_q->reg_control + AE4_INTR_STS_OFF);
63+
if (status & BIT(0)) {
64+
status &= GENMASK(31, 1);
65+
writel(status, cmd_q->reg_control + AE4_INTR_STS_OFF);
66+
}
67+
68+
wake_up(&ae4cmd_q->q_w);
69+
70+
return IRQ_HANDLED;
71+
}
72+
73+
void ae4_destroy_work(struct ae4_device *ae4)
74+
{
75+
struct ae4_cmd_queue *ae4cmd_q;
76+
int i;
77+
78+
for (i = 0; i < ae4->cmd_q_count; i++) {
79+
ae4cmd_q = &ae4->ae4cmd_q[i];
80+
81+
if (!ae4cmd_q->pws)
82+
break;
83+
84+
cancel_delayed_work_sync(&ae4cmd_q->p_work);
85+
destroy_workqueue(ae4cmd_q->pws);
86+
}
87+
}
88+
89+
int ae4_core_init(struct ae4_device *ae4)
90+
{
91+
struct pt_device *pt = &ae4->pt;
92+
struct ae4_cmd_queue *ae4cmd_q;
93+
struct device *dev = pt->dev;
94+
struct pt_cmd_queue *cmd_q;
95+
int i, ret = 0;
96+
97+
writel(max_hw_q, pt->io_regs);
98+
99+
for (i = 0; i < max_hw_q; i++) {
100+
ae4cmd_q = &ae4->ae4cmd_q[i];
101+
ae4cmd_q->id = ae4->cmd_q_count;
102+
ae4->cmd_q_count++;
103+
104+
cmd_q = &ae4cmd_q->cmd_q;
105+
cmd_q->pt = pt;
106+
107+
cmd_q->reg_control = pt->io_regs + ((i + 1) * AE4_Q_SZ);
108+
109+
ret = devm_request_irq(dev, ae4->ae4_irq[i], ae4_core_irq_handler, 0,
110+
dev_name(pt->dev), ae4cmd_q);
111+
if (ret)
112+
return ret;
113+
114+
cmd_q->qsize = Q_SIZE(sizeof(struct ae4dma_desc));
115+
116+
cmd_q->qbase = dmam_alloc_coherent(dev, cmd_q->qsize, &cmd_q->qbase_dma,
117+
GFP_KERNEL);
118+
if (!cmd_q->qbase)
119+
return -ENOMEM;
120+
}
121+
122+
for (i = 0; i < ae4->cmd_q_count; i++) {
123+
ae4cmd_q = &ae4->ae4cmd_q[i];
124+
125+
cmd_q = &ae4cmd_q->cmd_q;
126+
127+
cmd_q->reg_control = pt->io_regs + ((i + 1) * AE4_Q_SZ);
128+
129+
/* Update the device registers with queue information. */
130+
writel(CMD_Q_LEN, cmd_q->reg_control + AE4_MAX_IDX_OFF);
131+
132+
cmd_q->qdma_tail = cmd_q->qbase_dma;
133+
writel(lower_32_bits(cmd_q->qdma_tail), cmd_q->reg_control + AE4_Q_BASE_L_OFF);
134+
writel(upper_32_bits(cmd_q->qdma_tail), cmd_q->reg_control + AE4_Q_BASE_H_OFF);
135+
136+
INIT_LIST_HEAD(&ae4cmd_q->cmd);
137+
init_waitqueue_head(&ae4cmd_q->q_w);
138+
139+
ae4cmd_q->pws = alloc_ordered_workqueue("ae4dma_%d", WQ_MEM_RECLAIM, ae4cmd_q->id);
140+
if (!ae4cmd_q->pws) {
141+
ae4_destroy_work(ae4);
142+
return -ENOMEM;
143+
}
144+
INIT_DELAYED_WORK(&ae4cmd_q->p_work, ae4_pending_work);
145+
queue_delayed_work(ae4cmd_q->pws, &ae4cmd_q->p_work, usecs_to_jiffies(100));
146+
147+
init_completion(&ae4cmd_q->cmp);
148+
}
149+
150+
return ret;
151+
}

drivers/dma/amd/ae4dma/ae4dma-pci.c

Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
/*
3+
* AMD AE4DMA driver
4+
*
5+
* Copyright (c) 2024, Advanced Micro Devices, Inc.
6+
* All Rights Reserved.
7+
*
8+
* Author: Basavaraj Natikar <[email protected]>
9+
*/
10+
11+
#include "ae4dma.h"
12+
13+
static int ae4_get_irqs(struct ae4_device *ae4)
14+
{
15+
struct ae4_msix *ae4_msix = ae4->ae4_msix;
16+
struct pt_device *pt = &ae4->pt;
17+
struct device *dev = pt->dev;
18+
struct pci_dev *pdev;
19+
int i, v, ret;
20+
21+
pdev = to_pci_dev(dev);
22+
23+
for (v = 0; v < ARRAY_SIZE(ae4_msix->msix_entry); v++)
24+
ae4_msix->msix_entry[v].entry = v;
25+
26+
ret = pci_alloc_irq_vectors(pdev, v, v, PCI_IRQ_MSIX);
27+
if (ret != v) {
28+
if (ret > 0)
29+
pci_free_irq_vectors(pdev);
30+
31+
dev_err(dev, "could not enable MSI-X (%d), trying MSI\n", ret);
32+
ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI);
33+
if (ret < 0) {
34+
dev_err(dev, "could not enable MSI (%d)\n", ret);
35+
return ret;
36+
}
37+
38+
ret = pci_irq_vector(pdev, 0);
39+
if (ret < 0) {
40+
pci_free_irq_vectors(pdev);
41+
return ret;
42+
}
43+
44+
for (i = 0; i < MAX_AE4_HW_QUEUES; i++)
45+
ae4->ae4_irq[i] = ret;
46+
47+
} else {
48+
ae4_msix->msix_count = ret;
49+
for (i = 0; i < MAX_AE4_HW_QUEUES; i++)
50+
ae4->ae4_irq[i] = ae4_msix->msix_entry[i].vector;
51+
}
52+
53+
return ret;
54+
}
55+
56+
static void ae4_free_irqs(struct ae4_device *ae4)
57+
{
58+
struct ae4_msix *ae4_msix = ae4->ae4_msix;
59+
struct pt_device *pt = &ae4->pt;
60+
struct device *dev = pt->dev;
61+
struct pci_dev *pdev;
62+
63+
pdev = to_pci_dev(dev);
64+
65+
if (ae4_msix && (ae4_msix->msix_count || ae4->ae4_irq[MAX_AE4_HW_QUEUES - 1]))
66+
pci_free_irq_vectors(pdev);
67+
}
68+
69+
static void ae4_deinit(struct ae4_device *ae4)
70+
{
71+
ae4_free_irqs(ae4);
72+
}
73+
74+
static int ae4_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
75+
{
76+
struct device *dev = &pdev->dev;
77+
struct ae4_device *ae4;
78+
struct pt_device *pt;
79+
int bar_mask;
80+
int ret = 0;
81+
82+
ae4 = devm_kzalloc(dev, sizeof(*ae4), GFP_KERNEL);
83+
if (!ae4)
84+
return -ENOMEM;
85+
86+
ae4->ae4_msix = devm_kzalloc(dev, sizeof(struct ae4_msix), GFP_KERNEL);
87+
if (!ae4->ae4_msix)
88+
return -ENOMEM;
89+
90+
ret = pcim_enable_device(pdev);
91+
if (ret)
92+
goto ae4_error;
93+
94+
bar_mask = pci_select_bars(pdev, IORESOURCE_MEM);
95+
ret = pcim_iomap_regions(pdev, bar_mask, "ae4dma");
96+
if (ret)
97+
goto ae4_error;
98+
99+
pt = &ae4->pt;
100+
pt->dev = dev;
101+
102+
pt->io_regs = pcim_iomap_table(pdev)[0];
103+
if (!pt->io_regs) {
104+
ret = -ENOMEM;
105+
goto ae4_error;
106+
}
107+
108+
ret = ae4_get_irqs(ae4);
109+
if (ret < 0)
110+
goto ae4_error;
111+
112+
pci_set_master(pdev);
113+
114+
dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48));
115+
116+
dev_set_drvdata(dev, ae4);
117+
118+
ret = ae4_core_init(ae4);
119+
if (ret)
120+
goto ae4_error;
121+
122+
return 0;
123+
124+
ae4_error:
125+
ae4_deinit(ae4);
126+
127+
return ret;
128+
}
129+
130+
static void ae4_pci_remove(struct pci_dev *pdev)
131+
{
132+
struct ae4_device *ae4 = dev_get_drvdata(&pdev->dev);
133+
134+
ae4_destroy_work(ae4);
135+
ae4_deinit(ae4);
136+
}
137+
138+
static const struct pci_device_id ae4_pci_table[] = {
139+
{ PCI_VDEVICE(AMD, 0x14C8), },
140+
{ PCI_VDEVICE(AMD, 0x14DC), },
141+
{ PCI_VDEVICE(AMD, 0x149B), },
142+
/* Last entry must be zero */
143+
{ 0, }
144+
};
145+
MODULE_DEVICE_TABLE(pci, ae4_pci_table);
146+
147+
static struct pci_driver ae4_pci_driver = {
148+
.name = "ae4dma",
149+
.id_table = ae4_pci_table,
150+
.probe = ae4_pci_probe,
151+
.remove = ae4_pci_remove,
152+
};
153+
154+
module_pci_driver(ae4_pci_driver);
155+
156+
MODULE_LICENSE("GPL");
157+
MODULE_DESCRIPTION("AMD AE4DMA driver");

0 commit comments

Comments
 (0)