Skip to content

Commit 4529b57

Browse files
Andi KleenKuppuswamy Sathyanarayanan
authored andcommitted
swiotlb: Split up single swiotlb lock
Traditionally swiotlb was not performance critical because it was only used for slow devices. But in some setups, like TDX confidential guests, all IO has to go through swiotlb. Currently swiotlb only has a single lock. Under high IO load with multiple CPUs this can lead to signifiant lock contention on the swiotlb lock. We've seen 20+% CPU time in locks in some extreme cases. This patch splits the swiotlb into individual areas which have their own lock. Each CPU tries to allocate in its own area first. Only if that fails does it search other areas. On freeing the allocation is freed into the area where the memory was originally allocated from. To avoid doing a full modulo in the main path the number of swiotlb areas is always rounded to the next power of two. I believe that's not really needed anymore on modern CPUs (which have fast enough dividers), but still a good idea on older parts. The number of areas can be set using the swiotlb option. But to avoid every user having to set this option set the default to the number of available CPUs. Unfortunately on x86 swiotlb is initialized before num_possible_cpus() is available, that is why it uses a custom hook called from the early ACPI code. Signed-off-by: Andi Kleen <[email protected]>
1 parent feff673 commit 4529b57

File tree

4 files changed

+183
-29
lines changed

4 files changed

+183
-29
lines changed

Documentation/admin-guide/kernel-parameters.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5569,8 +5569,10 @@
55695569
it if 0 is given (See Documentation/admin-guide/cgroup-v1/memory.rst)
55705570

55715571
swiotlb= [ARM,IA-64,PPC,MIPS,X86]
5572-
Format: { <int> | force | noforce }
5572+
Format: { <int> [,<int>] | force | noforce }
55735573
<int> -- Number of I/O TLB slabs
5574+
<int> -- Second integer after comma. Number of swiotlb
5575+
areas with their own lock. Must be power of 2.
55745576
force -- force using of bounce buffers even if they
55755577
wouldn't be automatically used by the kernel
55765578
noforce -- Never use bounce buffers (for debugging)

arch/x86/kernel/acpi/boot.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include <linux/efi-bgrt.h>
2323
#include <linux/serial_core.h>
2424
#include <linux/pgtable.h>
25+
#include <linux/swiotlb.h>
2526

2627
#include <asm/e820/api.h>
2728
#include <asm/irqdomain.h>
@@ -1129,6 +1130,9 @@ static int __init acpi_parse_madt_lapic_entries(void)
11291130
return count;
11301131
}
11311132

1133+
/* This does not take overrides into consideration */
1134+
swiotlb_hint_cpus(max(count, x2count));
1135+
11321136
x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC_NMI,
11331137
acpi_parse_x2apic_nmi, 0);
11341138
count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_NMI,

include/linux/swiotlb.h

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ enum swiotlb_force {
3838

3939
extern void swiotlb_init(int verbose);
4040
int swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose);
41+
void swiotlb_hint_cpus(int cpus);
4142
unsigned long swiotlb_size_or_default(void);
4243
extern int swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs);
4344
extern int swiotlb_late_init_with_default_size(size_t default_size);
@@ -70,7 +71,25 @@ struct io_tlb_slot {
7071
};
7172

7273
/**
73-
* struct io_tlb_mem - IO TLB Memory Pool Descriptor
74+
* struct io_tlb_area - IO TLB memory area descriptor
75+
*
76+
* This is a single area with a single lock.
77+
*
78+
* @used: The number of used IO TLB block.
79+
* @list: The free list describing the number of free entries available
80+
* from each index.
81+
* @lock: The lock to protect the above data structures in the map and
82+
* unmap calls.
83+
*/
84+
85+
struct io_tlb_area {
86+
unsigned long used;
87+
struct list_head free_slots;
88+
spinlock_t lock;
89+
};
90+
91+
/**
92+
* struct io_tlb_mem - io tlb memory pool descriptor
7493
*
7594
* @start: The start address of the swiotlb memory pool. Used to do a quick
7695
* range check to see if the memory was in fact allocated by this
@@ -85,8 +104,6 @@ struct io_tlb_slot {
85104
* @index: The index to start searching in the next round.
86105
* @orig_addr: The original address corresponding to a mapped entry.
87106
* @alloc_size: Size of the allocated buffer.
88-
* @lock: The lock to protect the above data structures in the map and
89-
* unmap calls.
90107
* @debugfs: The dentry to debugfs.
91108
* @late_alloc: %true if allocated using the page allocator
92109
* @force_bounce: %true if swiotlb bouncing is forced
@@ -98,13 +115,11 @@ struct io_tlb_mem {
98115
phys_addr_t start;
99116
phys_addr_t end;
100117
unsigned long nslabs;
101-
unsigned long used;
102-
struct list_head free_slots;
103-
spinlock_t lock;
104118
struct dentry *debugfs;
105119
bool late_alloc;
106120
bool force_bounce;
107121
bool for_alloc;
122+
struct io_tlb_area *areas;
108123
struct io_tlb_slot *slots;
109124
unsigned long *bitmap;
110125
};

0 commit comments

Comments
 (0)