Skip to content

Commit c4211f4

Browse files
Alex ShiH. Peter Anvin
authored andcommitted
x86/tlb: add tlb_flushall_shift for specific CPU
Testing show different CPU type(micro architectures and NUMA mode) has different balance points between the TLB flush all and multiple invlpg. And there also has cases the tlb flush change has no any help. This patch give a interface to let x86 vendor developers have a chance to set different shift for different CPU type. like some machine in my hands, balance points is 16 entries on Romely-EP; while it is at 8 entries on Bloomfield NHM-EP; and is 256 on IVB mobile CPU. but on model 15 core2 Xeon using invlpg has nothing help. For untested machine, do a conservative optimization, same as NHM CPU. Signed-off-by: Alex Shi <[email protected]> Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: H. Peter Anvin <[email protected]>
1 parent d8dfe60 commit c4211f4

File tree

5 files changed

+53
-7
lines changed

5 files changed

+53
-7
lines changed

arch/x86/include/asm/processor.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,8 @@ extern u16 __read_mostly tlb_lli_4m[NR_INFO];
7272
extern u16 __read_mostly tlb_lld_4k[NR_INFO];
7373
extern u16 __read_mostly tlb_lld_2m[NR_INFO];
7474
extern u16 __read_mostly tlb_lld_4m[NR_INFO];
75+
extern s8 __read_mostly tlb_flushall_shift;
76+
7577
/*
7678
* CPU type and hardware bug flags. Kept separately for each CPU.
7779
* Members of this structure are referenced in head.S, so think twice

arch/x86/kernel/cpu/common.c

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -459,16 +459,26 @@ u16 __read_mostly tlb_lld_4k[NR_INFO];
459459
u16 __read_mostly tlb_lld_2m[NR_INFO];
460460
u16 __read_mostly tlb_lld_4m[NR_INFO];
461461

462+
/*
463+
* tlb_flushall_shift shows the balance point in replacing cr3 write
464+
* with multiple 'invlpg'. It will do this replacement when
465+
* flush_tlb_lines <= active_lines/2^tlb_flushall_shift.
466+
* If tlb_flushall_shift is -1, means the replacement will be disabled.
467+
*/
468+
s8 __read_mostly tlb_flushall_shift = -1;
469+
462470
void __cpuinit cpu_detect_tlb(struct cpuinfo_x86 *c)
463471
{
464472
if (this_cpu->c_detect_tlb)
465473
this_cpu->c_detect_tlb(c);
466474

467475
printk(KERN_INFO "Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \
468-
"Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d\n",
476+
"Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \
477+
"tlb_flushall_shift is 0x%x\n",
469478
tlb_lli_4k[ENTRIES], tlb_lli_2m[ENTRIES],
470479
tlb_lli_4m[ENTRIES], tlb_lld_4k[ENTRIES],
471-
tlb_lld_2m[ENTRIES], tlb_lld_4m[ENTRIES]);
480+
tlb_lld_2m[ENTRIES], tlb_lld_4m[ENTRIES],
481+
tlb_flushall_shift);
472482
}
473483

474484
void __cpuinit detect_ht(struct cpuinfo_x86 *c)

arch/x86/kernel/cpu/intel.c

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -610,6 +610,39 @@ static void __cpuinit intel_tlb_lookup(const unsigned char desc)
610610
}
611611
}
612612

613+
static void __cpuinit intel_tlb_flushall_shift_set(struct cpuinfo_x86 *c)
614+
{
615+
if (!cpu_has_invlpg) {
616+
tlb_flushall_shift = -1;
617+
return;
618+
}
619+
switch ((c->x86 << 8) + c->x86_model) {
620+
case 0x60f: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
621+
case 0x616: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
622+
case 0x617: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
623+
case 0x61d: /* six-core 45 nm xeon "Dunnington" */
624+
tlb_flushall_shift = -1;
625+
break;
626+
case 0x61a: /* 45 nm nehalem, "Bloomfield" */
627+
case 0x61e: /* 45 nm nehalem, "Lynnfield" */
628+
case 0x625: /* 32 nm nehalem, "Clarkdale" */
629+
case 0x62c: /* 32 nm nehalem, "Gulftown" */
630+
case 0x62e: /* 45 nm nehalem-ex, "Beckton" */
631+
case 0x62f: /* 32 nm Xeon E7 */
632+
tlb_flushall_shift = 6;
633+
break;
634+
case 0x62a: /* SandyBridge */
635+
case 0x62d: /* SandyBridge, "Romely-EP" */
636+
tlb_flushall_shift = 5;
637+
break;
638+
case 0x63a: /* Ivybridge */
639+
tlb_flushall_shift = 1;
640+
break;
641+
default:
642+
tlb_flushall_shift = 6;
643+
}
644+
}
645+
613646
static void __cpuinit intel_detect_tlb(struct cpuinfo_x86 *c)
614647
{
615648
int i, j, n;
@@ -630,6 +663,7 @@ static void __cpuinit intel_detect_tlb(struct cpuinfo_x86 *c)
630663
for (j = 1 ; j < 16 ; j++)
631664
intel_tlb_lookup(desc[j]);
632665
}
666+
intel_tlb_flushall_shift_set(c);
633667
}
634668

635669
static const struct cpu_dev __cpuinitconst intel_cpu_dev = {

arch/x86/mm/tlb.c

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -316,8 +316,6 @@ void flush_tlb_mm(struct mm_struct *mm)
316316
preempt_enable();
317317
}
318318

319-
#define FLUSHALL_BAR 16
320-
321319
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
322320
static inline unsigned long has_large_page(struct mm_struct *mm,
323321
unsigned long start, unsigned long end)
@@ -352,7 +350,7 @@ void flush_tlb_range(struct vm_area_struct *vma,
352350
{
353351
struct mm_struct *mm;
354352

355-
if (!cpu_has_invlpg || vma->vm_flags & VM_HUGETLB) {
353+
if (vma->vm_flags & VM_HUGETLB || tlb_flushall_shift == -1) {
356354
flush_all:
357355
flush_tlb_mm(vma->vm_mm);
358356
return;
@@ -373,7 +371,8 @@ void flush_tlb_range(struct vm_area_struct *vma,
373371
act_entries = tlb_entries > mm->total_vm ?
374372
mm->total_vm : tlb_entries;
375373

376-
if ((end - start)/PAGE_SIZE > act_entries/FLUSHALL_BAR)
374+
if ((end - start) >> PAGE_SHIFT >
375+
act_entries >> tlb_flushall_shift)
377376
local_flush_tlb();
378377
else {
379378
if (has_large_page(mm, start, end)) {

include/asm-generic/tlb.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,8 @@ static inline int tlb_fast_mode(struct mmu_gather *tlb)
113113

114114
void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, bool fullmm);
115115
void tlb_flush_mmu(struct mmu_gather *tlb);
116-
void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end);
116+
void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start,
117+
unsigned long end);
117118
int __tlb_remove_page(struct mmu_gather *tlb, struct page *page);
118119

119120
/* tlb_remove_page

0 commit comments

Comments
 (0)