Skip to content

Commit 32d5bc8

Browse files
niklas88joergroedel
authored andcommitted
iommu/dma: Allow a single FQ in addition to per-CPU FQs
In some virtualized environments, including s390 paged memory guests, IOTLB flushes are used to update IOMMU shadow tables. Due to this, they are much more expensive than in typical bare metal environments or non-paged s390 guests. In addition they may parallelize poorly in virtualized environments. This changes the trade off for flushing IOVAs such that minimizing the number of IOTLB flushes trumps any benefit of cheaper queuing operations or increased paralellism. In this scenario per-CPU flush queues pose several problems. Firstly per-CPU memory is often quite limited prohibiting larger queues. Secondly collecting IOVAs per-CPU but flushing via a global timeout reduces the number of IOVAs flushed for each timeout especially on s390 where PCI interrupts may not be bound to a specific CPU. Let's introduce a single flush queue mode that reuses the same queue logic but only allocates a single global queue. This mode is selected by dma-iommu if a newly introduced .shadow_on_flush flag is set in struct dev_iommu. As a first user the s390 IOMMU driver sets this flag during probe_device. With the unchanged small FQ size and timeouts this setting is worse than per-CPU queues but a follow up patch will make the FQ size and timeout variable. Together this allows the common IOVA flushing code to more closely resemble the global flush behavior used on s390's previous internal DMA API implementation. Link: https://lore.kernel.org/all/[email protected]/ Acked-by: Robin Murphy <[email protected]> Reviewed-by: Matthew Rosato <[email protected]> #s390 Signed-off-by: Niklas Schnelle <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Joerg Roedel <[email protected]>
1 parent 53f8e9a commit 32d5bc8

File tree

3 files changed

+134
-39
lines changed

3 files changed

+134
-39
lines changed

drivers/iommu/dma-iommu.c

Lines changed: 129 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -43,14 +43,26 @@ enum iommu_dma_cookie_type {
4343
IOMMU_DMA_MSI_COOKIE,
4444
};
4545

46+
enum iommu_dma_queue_type {
47+
IOMMU_DMA_OPTS_PER_CPU_QUEUE,
48+
IOMMU_DMA_OPTS_SINGLE_QUEUE,
49+
};
50+
51+
struct iommu_dma_options {
52+
enum iommu_dma_queue_type qt;
53+
};
54+
4655
struct iommu_dma_cookie {
4756
enum iommu_dma_cookie_type type;
4857
union {
4958
/* Full allocator for IOMMU_DMA_IOVA_COOKIE */
5059
struct {
5160
struct iova_domain iovad;
52-
53-
struct iova_fq __percpu *fq; /* Flush queue */
61+
/* Flush queue */
62+
union {
63+
struct iova_fq *single_fq;
64+
struct iova_fq __percpu *percpu_fq;
65+
};
5466
/* Number of TLB flushes that have been started */
5567
atomic64_t fq_flush_start_cnt;
5668
/* Number of TLB flushes that have been finished */
@@ -67,6 +79,8 @@ struct iommu_dma_cookie {
6779

6880
/* Domain for flush queue callback; NULL if flush queue not in use */
6981
struct iommu_domain *fq_domain;
82+
/* Options for dma-iommu use */
83+
struct iommu_dma_options options;
7084
struct mutex mutex;
7185
};
7286

@@ -124,7 +138,7 @@ static inline unsigned int fq_ring_add(struct iova_fq *fq)
124138
return idx;
125139
}
126140

127-
static void fq_ring_free(struct iommu_dma_cookie *cookie, struct iova_fq *fq)
141+
static void fq_ring_free_locked(struct iommu_dma_cookie *cookie, struct iova_fq *fq)
128142
{
129143
u64 counter = atomic64_read(&cookie->fq_flush_finish_cnt);
130144
unsigned int idx;
@@ -145,6 +159,15 @@ static void fq_ring_free(struct iommu_dma_cookie *cookie, struct iova_fq *fq)
145159
}
146160
}
147161

162+
static void fq_ring_free(struct iommu_dma_cookie *cookie, struct iova_fq *fq)
163+
{
164+
unsigned long flags;
165+
166+
spin_lock_irqsave(&fq->lock, flags);
167+
fq_ring_free_locked(cookie, fq);
168+
spin_unlock_irqrestore(&fq->lock, flags);
169+
}
170+
148171
static void fq_flush_iotlb(struct iommu_dma_cookie *cookie)
149172
{
150173
atomic64_inc(&cookie->fq_flush_start_cnt);
@@ -160,14 +183,11 @@ static void fq_flush_timeout(struct timer_list *t)
160183
atomic_set(&cookie->fq_timer_on, 0);
161184
fq_flush_iotlb(cookie);
162185

163-
for_each_possible_cpu(cpu) {
164-
unsigned long flags;
165-
struct iova_fq *fq;
166-
167-
fq = per_cpu_ptr(cookie->fq, cpu);
168-
spin_lock_irqsave(&fq->lock, flags);
169-
fq_ring_free(cookie, fq);
170-
spin_unlock_irqrestore(&fq->lock, flags);
186+
if (cookie->options.qt == IOMMU_DMA_OPTS_SINGLE_QUEUE) {
187+
fq_ring_free(cookie, cookie->single_fq);
188+
} else {
189+
for_each_possible_cpu(cpu)
190+
fq_ring_free(cookie, per_cpu_ptr(cookie->percpu_fq, cpu));
171191
}
172192
}
173193

@@ -188,19 +208,23 @@ static void queue_iova(struct iommu_dma_cookie *cookie,
188208
*/
189209
smp_mb();
190210

191-
fq = raw_cpu_ptr(cookie->fq);
211+
if (cookie->options.qt == IOMMU_DMA_OPTS_SINGLE_QUEUE)
212+
fq = cookie->single_fq;
213+
else
214+
fq = raw_cpu_ptr(cookie->percpu_fq);
215+
192216
spin_lock_irqsave(&fq->lock, flags);
193217

194218
/*
195219
* First remove all entries from the flush queue that have already been
196220
* flushed out on another CPU. This makes the fq_full() check below less
197221
* likely to be true.
198222
*/
199-
fq_ring_free(cookie, fq);
223+
fq_ring_free_locked(cookie, fq);
200224

201225
if (fq_full(fq)) {
202226
fq_flush_iotlb(cookie);
203-
fq_ring_free(cookie, fq);
227+
fq_ring_free_locked(cookie, fq);
204228
}
205229

206230
idx = fq_ring_add(fq);
@@ -219,58 +243,105 @@ static void queue_iova(struct iommu_dma_cookie *cookie,
219243
jiffies + msecs_to_jiffies(IOVA_FQ_TIMEOUT));
220244
}
221245

222-
static void iommu_dma_free_fq(struct iommu_dma_cookie *cookie)
246+
static void iommu_dma_free_fq_single(struct iova_fq *fq)
223247
{
224-
int cpu, idx;
248+
int idx;
225249

226-
if (!cookie->fq)
227-
return;
250+
fq_ring_for_each(idx, fq)
251+
put_pages_list(&fq->entries[idx].freelist);
252+
vfree(fq);
253+
}
254+
255+
static void iommu_dma_free_fq_percpu(struct iova_fq __percpu *percpu_fq)
256+
{
257+
int cpu, idx;
228258

229-
del_timer_sync(&cookie->fq_timer);
230259
/* The IOVAs will be torn down separately, so just free our queued pages */
231260
for_each_possible_cpu(cpu) {
232-
struct iova_fq *fq = per_cpu_ptr(cookie->fq, cpu);
261+
struct iova_fq *fq = per_cpu_ptr(percpu_fq, cpu);
233262

234263
fq_ring_for_each(idx, fq)
235264
put_pages_list(&fq->entries[idx].freelist);
236265
}
237266

238-
free_percpu(cookie->fq);
267+
free_percpu(percpu_fq);
268+
}
269+
270+
static void iommu_dma_free_fq(struct iommu_dma_cookie *cookie)
271+
{
272+
if (!cookie->fq_domain)
273+
return;
274+
275+
del_timer_sync(&cookie->fq_timer);
276+
if (cookie->options.qt == IOMMU_DMA_OPTS_SINGLE_QUEUE)
277+
iommu_dma_free_fq_single(cookie->single_fq);
278+
else
279+
iommu_dma_free_fq_percpu(cookie->percpu_fq);
280+
}
281+
282+
static void iommu_dma_init_one_fq(struct iova_fq *fq)
283+
{
284+
int i;
285+
286+
fq->head = 0;
287+
fq->tail = 0;
288+
289+
spin_lock_init(&fq->lock);
290+
291+
for (i = 0; i < IOVA_FQ_SIZE; i++)
292+
INIT_LIST_HEAD(&fq->entries[i].freelist);
293+
}
294+
295+
static int iommu_dma_init_fq_single(struct iommu_dma_cookie *cookie)
296+
{
297+
struct iova_fq *queue;
298+
299+
queue = vmalloc(sizeof(*queue));
300+
if (!queue)
301+
return -ENOMEM;
302+
iommu_dma_init_one_fq(queue);
303+
cookie->single_fq = queue;
304+
305+
return 0;
306+
}
307+
308+
static int iommu_dma_init_fq_percpu(struct iommu_dma_cookie *cookie)
309+
{
310+
struct iova_fq __percpu *queue;
311+
int cpu;
312+
313+
queue = alloc_percpu(struct iova_fq);
314+
if (!queue)
315+
return -ENOMEM;
316+
317+
for_each_possible_cpu(cpu)
318+
iommu_dma_init_one_fq(per_cpu_ptr(queue, cpu));
319+
cookie->percpu_fq = queue;
320+
return 0;
239321
}
240322

241323
/* sysfs updates are serialised by the mutex of the group owning @domain */
242324
int iommu_dma_init_fq(struct iommu_domain *domain)
243325
{
244326
struct iommu_dma_cookie *cookie = domain->iova_cookie;
245-
struct iova_fq __percpu *queue;
246-
int i, cpu;
327+
int rc;
247328

248329
if (cookie->fq_domain)
249330
return 0;
250331

251332
atomic64_set(&cookie->fq_flush_start_cnt, 0);
252333
atomic64_set(&cookie->fq_flush_finish_cnt, 0);
253334

254-
queue = alloc_percpu(struct iova_fq);
255-
if (!queue) {
335+
if (cookie->options.qt == IOMMU_DMA_OPTS_SINGLE_QUEUE)
336+
rc = iommu_dma_init_fq_single(cookie);
337+
else
338+
rc = iommu_dma_init_fq_percpu(cookie);
339+
340+
if (rc) {
256341
pr_warn("iova flush queue initialization failed\n");
257342
return -ENOMEM;
258343
}
259344

260-
for_each_possible_cpu(cpu) {
261-
struct iova_fq *fq = per_cpu_ptr(queue, cpu);
262-
263-
fq->head = 0;
264-
fq->tail = 0;
265-
266-
spin_lock_init(&fq->lock);
267-
268-
for (i = 0; i < IOVA_FQ_SIZE; i++)
269-
INIT_LIST_HEAD(&fq->entries[i].freelist);
270-
}
271-
272-
cookie->fq = queue;
273-
274345
timer_setup(&cookie->fq_timer, fq_flush_timeout, 0);
275346
atomic_set(&cookie->fq_timer_on, 0);
276347
/*
@@ -554,6 +625,23 @@ static bool dev_use_sg_swiotlb(struct device *dev, struct scatterlist *sg,
554625
return false;
555626
}
556627

628+
/**
629+
* iommu_dma_init_options - Initialize dma-iommu options
630+
* @options: The options to be initialized
631+
* @dev: Device the options are set for
632+
*
633+
* This allows tuning dma-iommu specific to device properties
634+
*/
635+
static void iommu_dma_init_options(struct iommu_dma_options *options,
636+
struct device *dev)
637+
{
638+
/* Shadowing IOTLB flushes do better with a single queue */
639+
if (dev->iommu->shadow_on_flush)
640+
options->qt = IOMMU_DMA_OPTS_SINGLE_QUEUE;
641+
else
642+
options->qt = IOMMU_DMA_OPTS_PER_CPU_QUEUE;
643+
}
644+
557645
/**
558646
* iommu_dma_init_domain - Initialise a DMA mapping domain
559647
* @domain: IOMMU domain previously prepared by iommu_get_dma_cookie()
@@ -614,6 +702,8 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
614702
if (ret)
615703
goto done_unlock;
616704

705+
iommu_dma_init_options(&cookie->options, dev);
706+
617707
/* If the FQ fails we can simply fall back to strict mode */
618708
if (domain->type == IOMMU_DOMAIN_DMA_FQ &&
619709
(!device_iommu_capable(dev, IOMMU_CAP_DEFERRED_FLUSH) || iommu_dma_init_fq(domain)))

drivers/iommu/s390-iommu.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -463,6 +463,9 @@ static struct iommu_device *s390_iommu_probe_device(struct device *dev)
463463
if (zdev->end_dma > ZPCI_TABLE_SIZE_RT - 1)
464464
zdev->end_dma = ZPCI_TABLE_SIZE_RT - 1;
465465

466+
if (zdev->tlb_refresh)
467+
dev->iommu->shadow_on_flush = 1;
468+
466469
return &zdev->iommu_dev;
467470
}
468471

include/linux/iommu.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -424,6 +424,7 @@ struct iommu_fault_param {
424424
* @attach_deferred: the dma domain attachment is deferred
425425
* @pci_32bit_workaround: Limit DMA allocations to 32-bit IOVAs
426426
* @require_direct: device requires IOMMU_RESV_DIRECT regions
427+
* @shadow_on_flush: IOTLB flushes are used to sync shadow tables
427428
*
428429
* TODO: migrate other per device data pointers under iommu_dev_data, e.g.
429430
* struct iommu_group *iommu_group;
@@ -439,6 +440,7 @@ struct dev_iommu {
439440
u32 attach_deferred:1;
440441
u32 pci_32bit_workaround:1;
441442
u32 require_direct:1;
443+
u32 shadow_on_flush:1;
442444
};
443445

444446
int iommu_device_register(struct iommu_device *iommu,

0 commit comments

Comments
 (0)