Skip to content

Commit 9f5b681

Browse files
niklas88joergroedel
authored andcommitted
iommu/dma: Use a large flush queue and timeout for shadow_on_flush
Flush queues currently use a fixed compile time size of 256 entries. This being a power of 2 allows the compiler to use shift and mask instead of more expensive modulo operations. With per-CPU flush queues larger queue sizes would hit per-CPU allocation limits, with a single flush queue these limits do not apply however. Also with single queues being particularly suitable for virtualized environments with expensive IOTLB flushes these benefit especially from larger queues and thus fewer flushes. To this end re-order struct iova_fq so we can use a dynamic array and introduce the flush queue size and timeouts as new options in the iommu_dma_options struct. So as not to lose the shift and mask optimization, use a power of 2 for the length and use explicit shift and mask instead of letting the compiler optimize this. A large queue size and 1 second timeout is then set for the shadow on flush case set by s390 paged memory guests. This then brings performance on par with the previous s390 specific DMA API implementation. Acked-by: Robin Murphy <[email protected]> Reviewed-by: Matthew Rosato <[email protected]> #s390 Signed-off-by: Niklas Schnelle <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Joerg Roedel <[email protected]>
1 parent 32d5bc8 commit 9f5b681

File tree

1 file changed

+32
-18
lines changed

1 file changed

+32
-18
lines changed

drivers/iommu/dma-iommu.c

Lines changed: 32 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@ enum iommu_dma_queue_type {
5050

5151
struct iommu_dma_options {
5252
enum iommu_dma_queue_type qt;
53+
size_t fq_size;
54+
unsigned int fq_timeout;
5355
};
5456

5557
struct iommu_dma_cookie {
@@ -98,10 +100,12 @@ static int __init iommu_dma_forcedac_setup(char *str)
98100
early_param("iommu.forcedac", iommu_dma_forcedac_setup);
99101

100102
/* Number of entries per flush queue */
101-
#define IOVA_FQ_SIZE 256
103+
#define IOVA_DEFAULT_FQ_SIZE 256
104+
#define IOVA_SINGLE_FQ_SIZE 32768
102105

103106
/* Timeout (in ms) after which entries are flushed from the queue */
104-
#define IOVA_FQ_TIMEOUT 10
107+
#define IOVA_DEFAULT_FQ_TIMEOUT 10
108+
#define IOVA_SINGLE_FQ_TIMEOUT 1000
105109

106110
/* Flush queue entry for deferred flushing */
107111
struct iova_fq_entry {
@@ -113,18 +117,19 @@ struct iova_fq_entry {
113117

114118
/* Per-CPU flush queue structure */
115119
struct iova_fq {
116-
struct iova_fq_entry entries[IOVA_FQ_SIZE];
117-
unsigned int head, tail;
118120
spinlock_t lock;
121+
unsigned int head, tail;
122+
unsigned int mod_mask;
123+
struct iova_fq_entry entries[];
119124
};
120125

121126
#define fq_ring_for_each(i, fq) \
122-
for ((i) = (fq)->head; (i) != (fq)->tail; (i) = ((i) + 1) % IOVA_FQ_SIZE)
127+
for ((i) = (fq)->head; (i) != (fq)->tail; (i) = ((i) + 1) & (fq)->mod_mask)
123128

124129
static inline bool fq_full(struct iova_fq *fq)
125130
{
126131
assert_spin_locked(&fq->lock);
127-
return (((fq->tail + 1) % IOVA_FQ_SIZE) == fq->head);
132+
return (((fq->tail + 1) & fq->mod_mask) == fq->head);
128133
}
129134

130135
static inline unsigned int fq_ring_add(struct iova_fq *fq)
@@ -133,7 +138,7 @@ static inline unsigned int fq_ring_add(struct iova_fq *fq)
133138

134139
assert_spin_locked(&fq->lock);
135140

136-
fq->tail = (idx + 1) % IOVA_FQ_SIZE;
141+
fq->tail = (idx + 1) & fq->mod_mask;
137142

138143
return idx;
139144
}
@@ -155,7 +160,7 @@ static void fq_ring_free_locked(struct iommu_dma_cookie *cookie, struct iova_fq
155160
fq->entries[idx].iova_pfn,
156161
fq->entries[idx].pages);
157162

158-
fq->head = (fq->head + 1) % IOVA_FQ_SIZE;
163+
fq->head = (fq->head + 1) & fq->mod_mask;
159164
}
160165
}
161166

@@ -240,7 +245,7 @@ static void queue_iova(struct iommu_dma_cookie *cookie,
240245
if (!atomic_read(&cookie->fq_timer_on) &&
241246
!atomic_xchg(&cookie->fq_timer_on, 1))
242247
mod_timer(&cookie->fq_timer,
243-
jiffies + msecs_to_jiffies(IOVA_FQ_TIMEOUT));
248+
jiffies + msecs_to_jiffies(cookie->options.fq_timeout));
244249
}
245250

246251
static void iommu_dma_free_fq_single(struct iova_fq *fq)
@@ -279,43 +284,47 @@ static void iommu_dma_free_fq(struct iommu_dma_cookie *cookie)
279284
iommu_dma_free_fq_percpu(cookie->percpu_fq);
280285
}
281286

282-
static void iommu_dma_init_one_fq(struct iova_fq *fq)
287+
static void iommu_dma_init_one_fq(struct iova_fq *fq, size_t fq_size)
283288
{
284289
int i;
285290

286291
fq->head = 0;
287292
fq->tail = 0;
293+
fq->mod_mask = fq_size - 1;
288294

289295
spin_lock_init(&fq->lock);
290296

291-
for (i = 0; i < IOVA_FQ_SIZE; i++)
297+
for (i = 0; i < fq_size; i++)
292298
INIT_LIST_HEAD(&fq->entries[i].freelist);
293299
}
294300

295301
static int iommu_dma_init_fq_single(struct iommu_dma_cookie *cookie)
296302
{
303+
size_t fq_size = cookie->options.fq_size;
297304
struct iova_fq *queue;
298305

299-
queue = vmalloc(sizeof(*queue));
306+
queue = vmalloc(struct_size(queue, entries, fq_size));
300307
if (!queue)
301308
return -ENOMEM;
302-
iommu_dma_init_one_fq(queue);
309+
iommu_dma_init_one_fq(queue, fq_size);
303310
cookie->single_fq = queue;
304311

305312
return 0;
306313
}
307314

308315
static int iommu_dma_init_fq_percpu(struct iommu_dma_cookie *cookie)
309316
{
317+
size_t fq_size = cookie->options.fq_size;
310318
struct iova_fq __percpu *queue;
311319
int cpu;
312320

313-
queue = alloc_percpu(struct iova_fq);
321+
queue = __alloc_percpu(struct_size(queue, entries, fq_size),
322+
__alignof__(*queue));
314323
if (!queue)
315324
return -ENOMEM;
316325

317326
for_each_possible_cpu(cpu)
318-
iommu_dma_init_one_fq(per_cpu_ptr(queue, cpu));
327+
iommu_dma_init_one_fq(per_cpu_ptr(queue, cpu), fq_size);
319328
cookie->percpu_fq = queue;
320329
return 0;
321330
}
@@ -635,11 +644,16 @@ static bool dev_use_sg_swiotlb(struct device *dev, struct scatterlist *sg,
635644
static void iommu_dma_init_options(struct iommu_dma_options *options,
636645
struct device *dev)
637646
{
638-
/* Shadowing IOTLB flushes do better with a single queue */
639-
if (dev->iommu->shadow_on_flush)
647+
/* Shadowing IOTLB flushes do better with a single large queue */
648+
if (dev->iommu->shadow_on_flush) {
640649
options->qt = IOMMU_DMA_OPTS_SINGLE_QUEUE;
641-
else
650+
options->fq_timeout = IOVA_SINGLE_FQ_TIMEOUT;
651+
options->fq_size = IOVA_SINGLE_FQ_SIZE;
652+
} else {
642653
options->qt = IOMMU_DMA_OPTS_PER_CPU_QUEUE;
654+
options->fq_size = IOVA_DEFAULT_FQ_SIZE;
655+
options->fq_timeout = IOVA_DEFAULT_FQ_TIMEOUT;
656+
}
643657
}
644658

645659
/**

0 commit comments

Comments
 (0)