Skip to content

Commit 85bcfaf

Browse files
npigginmpe
authored andcommitted
powerpc/64s/radix: optimise pte_update
Implementing pte_update with pte_xchg (which uses cmpxchg) is inefficient. A single larx/stcx. works fine, no need for the less efficient cmpxchg sequence. Then remove the memory barriers from the operation. There is a requirement for TLB flushing to load mm_cpumask after the store that reduces pte permissions, which is moved into the TLB flush code. Signed-off-by: Nicholas Piggin <[email protected]> Signed-off-by: Michael Ellerman <[email protected]>
1 parent f1cb8f9 commit 85bcfaf

File tree

3 files changed

+27
-15
lines changed

3 files changed

+27
-15
lines changed

arch/powerpc/include/asm/book3s/64/radix.h

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -131,20 +131,21 @@ extern void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep
131131
static inline unsigned long __radix_pte_update(pte_t *ptep, unsigned long clr,
132132
unsigned long set)
133133
{
134-
pte_t pte;
135-
unsigned long old_pte, new_pte;
136-
137-
do {
138-
pte = READ_ONCE(*ptep);
139-
old_pte = pte_val(pte);
140-
new_pte = (old_pte | set) & ~clr;
141-
142-
} while (!pte_xchg(ptep, __pte(old_pte), __pte(new_pte)));
143-
144-
return old_pte;
134+
__be64 old_be, tmp_be;
135+
136+
__asm__ __volatile__(
137+
"1: ldarx %0,0,%3 # pte_update\n"
138+
" andc %1,%0,%5 \n"
139+
" or %1,%1,%4 \n"
140+
" stdcx. %1,0,%3 \n"
141+
" bne- 1b"
142+
: "=&r" (old_be), "=&r" (tmp_be), "=m" (*ptep)
143+
: "r" (ptep), "r" (cpu_to_be64(set)), "r" (cpu_to_be64(clr))
144+
: "cc" );
145+
146+
return be64_to_cpu(old_be);
145147
}
146148

147-
148149
static inline unsigned long radix__pte_update(struct mm_struct *mm,
149150
unsigned long addr,
150151
pte_t *ptep, unsigned long clr,

arch/powerpc/mm/mmu_context.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,10 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
5757
* in switch_slb(), and/or the store of paca->mm_ctx_id in
5858
* copy_mm_to_paca().
5959
*
60-
* On the read side the barrier is in pte_xchg(), which orders
61-
* the store to the PTE vs the load of mm_cpumask.
60+
* On the other side, the barrier is in mm/tlb-radix.c for
61+
* radix which orders earlier stores to clear the PTEs vs
62+
* the load of mm_cpumask. And pte_xchg which does the same
63+
* thing for hash.
6264
*
6365
* This full barrier is needed by membarrier when switching
6466
* between processes after store to rq->curr, before user-space

arch/powerpc/mm/tlb-radix.c

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -524,6 +524,11 @@ void radix__flush_tlb_mm(struct mm_struct *mm)
524524
return;
525525

526526
preempt_disable();
527+
/*
528+
* Order loads of mm_cpumask vs previous stores to clear ptes before
529+
* the invalidate. See barrier in switch_mm_irqs_off
530+
*/
531+
smp_mb();
527532
if (!mm_is_thread_local(mm)) {
528533
if (mm_needs_flush_escalation(mm))
529534
_tlbie_pid(pid, RIC_FLUSH_ALL);
@@ -544,6 +549,7 @@ void radix__flush_all_mm(struct mm_struct *mm)
544549
return;
545550

546551
preempt_disable();
552+
smp_mb(); /* see radix__flush_tlb_mm */
547553
if (!mm_is_thread_local(mm))
548554
_tlbie_pid(pid, RIC_FLUSH_ALL);
549555
else
@@ -568,6 +574,7 @@ void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
568574
return;
569575

570576
preempt_disable();
577+
smp_mb(); /* see radix__flush_tlb_mm */
571578
if (!mm_is_thread_local(mm))
572579
_tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
573580
else
@@ -630,6 +637,7 @@ void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
630637
return;
631638

632639
preempt_disable();
640+
smp_mb(); /* see radix__flush_tlb_mm */
633641
if (mm_is_thread_local(mm)) {
634642
local = true;
635643
full = (end == TLB_FLUSH_ALL ||
@@ -791,6 +799,7 @@ static inline void __radix__flush_tlb_range_psize(struct mm_struct *mm,
791799
return;
792800

793801
preempt_disable();
802+
smp_mb(); /* see radix__flush_tlb_mm */
794803
if (mm_is_thread_local(mm)) {
795804
local = true;
796805
full = (end == TLB_FLUSH_ALL ||
@@ -849,7 +858,7 @@ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
849858

850859
/* Otherwise first do the PWC, then iterate the pages. */
851860
preempt_disable();
852-
861+
smp_mb(); /* see radix__flush_tlb_mm */
853862
if (mm_is_thread_local(mm)) {
854863
_tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
855864
} else {

0 commit comments

Comments
 (0)