Skip to content

Commit c89ca8a

Browse files
author
Scott Wood
committed
powerpc/e6500: Optimize hugepage TLB misses
Some workloads take a lot of TLB misses despite using traditional hugepages. Handle these TLB misses in the asm fastpath rather than going through a bunch of C code. With this patch I measured around a 5x speedup in handling hugepage TLB misses. Signed-off-by: Scott Wood <[email protected]>
1 parent fb326e9 commit c89ca8a

File tree

1 file changed

+48
-3
lines changed

1 file changed

+48
-3
lines changed

arch/powerpc/mm/tlb_low_64e.S

Lines changed: 48 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -398,18 +398,18 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_SMT)
398398
rldicl r15,r16,64-PUD_SHIFT+3,64-PUD_INDEX_SIZE-3
399399
clrrdi r15,r15,3
400400
cmpdi cr0,r14,0
401-
bge tlb_miss_fault_e6500 /* Bad pgd entry or hugepage; bail */
401+
bge tlb_miss_huge_e6500 /* Bad pgd entry or hugepage; bail */
402402
ldx r14,r14,r15 /* grab pud entry */
403403

404404
rldicl r15,r16,64-PMD_SHIFT+3,64-PMD_INDEX_SIZE-3
405405
clrrdi r15,r15,3
406406
cmpdi cr0,r14,0
407-
bge tlb_miss_fault_e6500
407+
bge tlb_miss_huge_e6500
408408
ldx r14,r14,r15 /* Grab pmd entry */
409409

410410
mfspr r10,SPRN_MAS0
411411
cmpdi cr0,r14,0
412-
bge tlb_miss_fault_e6500
412+
bge tlb_miss_huge_e6500
413413

414414
/* Now we build the MAS for a 2M indirect page:
415415
*
@@ -428,6 +428,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_SMT)
428428
clrrdi r15,r16,21 /* make EA 2M-aligned */
429429
mtspr SPRN_MAS2,r15
430430

431+
tlb_miss_huge_done_e6500:
431432
lbz r15,TCD_ESEL_NEXT(r11)
432433
lbz r16,TCD_ESEL_MAX(r11)
433434
lbz r14,TCD_ESEL_FIRST(r11)
@@ -456,6 +457,50 @@ END_FTR_SECTION_IFSET(CPU_FTR_SMT)
456457
tlb_epilog_bolted
457458
rfi
458459

460+
tlb_miss_huge_e6500:
461+
beq tlb_miss_fault_e6500
462+
li r10,1
463+
andi. r15,r14,HUGEPD_SHIFT_MASK@l /* r15 = psize */
464+
rldimi r14,r10,63,0 /* Set PD_HUGE */
465+
xor r14,r14,r15 /* Clear size bits */
466+
ldx r14,0,r14
467+
468+
/*
469+
* Now we build the MAS for a huge page.
470+
*
471+
* MAS 0 : ESEL needs to be filled by software round-robin
472+
* - can be handled by indirect code
473+
* MAS 1 : Need to clear IND and set TSIZE
474+
* MAS 2,3+7: Needs to be redone similar to non-tablewalk handler
475+
*/
476+
477+
subi r15,r15,10 /* Convert psize to tsize */
478+
mfspr r10,SPRN_MAS1
479+
rlwinm r10,r10,0,~MAS1_IND
480+
rlwimi r10,r15,MAS1_TSIZE_SHIFT,MAS1_TSIZE_MASK
481+
mtspr SPRN_MAS1,r10
482+
483+
li r10,-0x400
484+
sld r15,r10,r15 /* Generate mask based on size */
485+
and r10,r16,r15
486+
rldicr r15,r14,64-(PTE_RPN_SHIFT-PAGE_SHIFT),63-PAGE_SHIFT
487+
rlwimi r10,r14,32-19,27,31 /* Insert WIMGE */
488+
clrldi r15,r15,PAGE_SHIFT /* Clear crap at the top */
489+
rlwimi r15,r14,32-8,22,25 /* Move in U bits */
490+
mtspr SPRN_MAS2,r10
491+
andi. r10,r14,_PAGE_DIRTY
492+
rlwimi r15,r14,32-2,26,31 /* Move in BAP bits */
493+
494+
/* Mask out SW and UW if !DIRTY (XXX optimize this !) */
495+
bne 1f
496+
li r10,MAS3_SW|MAS3_UW
497+
andc r15,r15,r10
498+
1:
499+
mtspr SPRN_MAS7_MAS3,r15
500+
501+
mfspr r10,SPRN_MAS0
502+
b tlb_miss_huge_done_e6500
503+
459504
tlb_miss_kernel_e6500:
460505
ld r14,PACA_KERNELPGD(r13)
461506
cmpldi cr1,r15,8 /* Check for vmalloc region */

0 commit comments

Comments
 (0)