Skip to content

Commit 2f4b829

Browse files
ctmarinaswildea01
authored andcommitted
arm64: Add support for hardware updates of the access and dirty pte bits
The ARMv8.1 architecture extensions introduce support for hardware updates of the access and dirty information in page table entries. With TCR_EL1.HA enabled, when the CPU accesses an address with the PTE_AF bit cleared in the page table, instead of raising an access flag fault the CPU sets the actual page table entry bit. To ensure that kernel modifications to the page tables do not inadvertently revert a change introduced by hardware updates, the exclusive monitor (ldxr/stxr) is adopted in the pte accessors. When TCR_EL1.HD is enabled, a write access to a memory location with the DBM (Dirty Bit Management) bit set in the corresponding pte automatically clears the read-only bit (AP[2]). Such DBM bit maps onto the Linux PTE_WRITE bit and to check whether a writable (DBM set) page is dirty, the kernel tests the PTE_RDONLY bit. In order to allow read-only and dirty pages, the kernel needs to preserve the software dirty bit. The hardware dirty status is transferred to the software dirty bit in ptep_set_wrprotect() (using load/store exclusive loop) and pte_modify(). Signed-off-by: Catalin Marinas <[email protected]> Signed-off-by: Will Deacon <[email protected]>
1 parent b08d464 commit 2f4b829

File tree

4 files changed

+178
-2
lines changed

4 files changed

+178
-2
lines changed

arch/arm64/Kconfig

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -469,6 +469,23 @@ config ARM64_VA_BITS
469469
default 42 if ARM64_VA_BITS_42
470470
default 48 if ARM64_VA_BITS_48
471471

472+
config ARM64_HW_AFDBM
473+
bool "Support for hardware updates of the Access and Dirty page flags"
474+
default y
475+
help
476+
The ARMv8.1 architecture extensions introduce support for
477+
hardware updates of the access and dirty information in page
478+
table entries. When enabled in TCR_EL1 (HA and HD bits) on
479+
capable processors, accesses to pages with PTE_AF cleared will
480+
set this bit instead of raising an access flag fault.
481+
Similarly, writes to read-only pages with the DBM bit set will
482+
clear the read-only bit (AP[2]) instead of raising a
483+
permission fault.
484+
485+
Kernels built with this configuration option enabled continue
486+
to work on pre-ARMv8.1 hardware and the performance impact is
487+
minimal. If unsure, say Y.
488+
472489
config CPU_BIG_ENDIAN
473490
bool "Build big-endian kernel"
474491
help

arch/arm64/include/asm/pgtable-hwdef.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@
104104
#define PTE_SHARED (_AT(pteval_t, 3) << 8) /* SH[1:0], inner shareable */
105105
#define PTE_AF (_AT(pteval_t, 1) << 10) /* Access Flag */
106106
#define PTE_NG (_AT(pteval_t, 1) << 11) /* nG */
107+
#define PTE_DBM (_AT(pteval_t, 1) << 51) /* Dirty Bit Management */
107108
#define PTE_PXN (_AT(pteval_t, 1) << 53) /* Privileged XN */
108109
#define PTE_UXN (_AT(pteval_t, 1) << 54) /* User XN */
109110

@@ -168,5 +169,7 @@
168169
#define TCR_TG1_64K (UL(3) << 30)
169170
#define TCR_ASID16 (UL(1) << 36)
170171
#define TCR_TBI0 (UL(1) << 37)
172+
#define TCR_HA (UL(1) << 39)
173+
#define TCR_HD (UL(1) << 40)
171174

172175
#endif

arch/arm64/include/asm/pgtable.h

Lines changed: 145 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#ifndef __ASM_PGTABLE_H
1717
#define __ASM_PGTABLE_H
1818

19+
#include <asm/bug.h>
1920
#include <asm/proc-fns.h>
2021

2122
#include <asm/memory.h>
@@ -27,7 +28,11 @@
2728
#define PTE_VALID (_AT(pteval_t, 1) << 0)
2829
#define PTE_DIRTY (_AT(pteval_t, 1) << 55)
2930
#define PTE_SPECIAL (_AT(pteval_t, 1) << 56)
31+
#ifdef CONFIG_ARM64_HW_AFDBM
32+
#define PTE_WRITE (PTE_DBM) /* same as DBM */
33+
#else
3034
#define PTE_WRITE (_AT(pteval_t, 1) << 57)
35+
#endif
3136
#define PTE_PROT_NONE (_AT(pteval_t, 1) << 58) /* only when !PTE_VALID */
3237

3338
/*
@@ -48,6 +53,9 @@
4853
#define FIRST_USER_ADDRESS 0UL
4954

5055
#ifndef __ASSEMBLY__
56+
57+
#include <linux/mmdebug.h>
58+
5159
extern void __pte_error(const char *file, int line, unsigned long val);
5260
extern void __pmd_error(const char *file, int line, unsigned long val);
5361
extern void __pud_error(const char *file, int line, unsigned long val);
@@ -137,12 +145,20 @@ extern struct page *empty_zero_page;
137145
* The following only work if pte_present(). Undefined behaviour otherwise.
138146
*/
139147
#define pte_present(pte) (!!(pte_val(pte) & (PTE_VALID | PTE_PROT_NONE)))
140-
#define pte_dirty(pte) (!!(pte_val(pte) & PTE_DIRTY))
141148
#define pte_young(pte) (!!(pte_val(pte) & PTE_AF))
142149
#define pte_special(pte) (!!(pte_val(pte) & PTE_SPECIAL))
143150
#define pte_write(pte) (!!(pte_val(pte) & PTE_WRITE))
144151
#define pte_exec(pte) (!(pte_val(pte) & PTE_UXN))
145152

153+
#ifdef CONFIG_ARM64_HW_AFDBM
154+
#define pte_hw_dirty(pte) (!(pte_val(pte) & PTE_RDONLY))
155+
#else
156+
#define pte_hw_dirty(pte) (0)
157+
#endif
158+
#define pte_sw_dirty(pte) (!!(pte_val(pte) & PTE_DIRTY))
159+
#define pte_dirty(pte) (pte_sw_dirty(pte) || pte_hw_dirty(pte))
160+
161+
#define pte_valid(pte) (!!(pte_val(pte) && PTE_VALID))
146162
#define pte_valid_user(pte) \
147163
((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER))
148164
#define pte_valid_not_user(pte) \
@@ -209,20 +225,49 @@ static inline void set_pte(pte_t *ptep, pte_t pte)
209225
}
210226
}
211227

228+
struct mm_struct;
229+
struct vm_area_struct;
230+
212231
extern void __sync_icache_dcache(pte_t pteval, unsigned long addr);
213232

233+
/*
234+
* PTE bits configuration in the presence of hardware Dirty Bit Management
235+
* (PTE_WRITE == PTE_DBM):
236+
*
237+
* Dirty Writable | PTE_RDONLY PTE_WRITE PTE_DIRTY (sw)
238+
* 0 0 | 1 0 0
239+
* 0 1 | 1 1 0
240+
* 1 0 | 1 0 1
241+
* 1 1 | 0 1 x
242+
*
243+
* When hardware DBM is not present, the sofware PTE_DIRTY bit is updated via
244+
* the page fault mechanism. Checking the dirty status of a pte becomes:
245+
*
246+
* PTE_DIRTY || !PTE_RDONLY
247+
*/
214248
static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
215249
pte_t *ptep, pte_t pte)
216250
{
217251
if (pte_valid_user(pte)) {
218252
if (!pte_special(pte) && pte_exec(pte))
219253
__sync_icache_dcache(pte, addr);
220-
if (pte_dirty(pte) && pte_write(pte))
254+
if (pte_sw_dirty(pte) && pte_write(pte))
221255
pte_val(pte) &= ~PTE_RDONLY;
222256
else
223257
pte_val(pte) |= PTE_RDONLY;
224258
}
225259

260+
/*
261+
* If the existing pte is valid, check for potential race with
262+
* hardware updates of the pte (ptep_set_access_flags safely changes
263+
* valid ptes without going through an invalid entry).
264+
*/
265+
if (IS_ENABLED(CONFIG_DEBUG_VM) && IS_ENABLED(CONFIG_ARM64_HW_AFDBM) &&
266+
pte_valid(*ptep)) {
267+
BUG_ON(!pte_young(pte));
268+
BUG_ON(pte_write(*ptep) && !pte_dirty(pte));
269+
}
270+
226271
set_pte(ptep, pte);
227272
}
228273

@@ -461,6 +506,9 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
461506
{
462507
const pteval_t mask = PTE_USER | PTE_PXN | PTE_UXN | PTE_RDONLY |
463508
PTE_PROT_NONE | PTE_WRITE | PTE_TYPE_MASK;
509+
/* preserve the hardware dirty information */
510+
if (pte_hw_dirty(pte))
511+
newprot |= PTE_DIRTY;
464512
pte_val(pte) = (pte_val(pte) & ~mask) | (pgprot_val(newprot) & mask);
465513
return pte;
466514
}
@@ -470,6 +518,101 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
470518
return pte_pmd(pte_modify(pmd_pte(pmd), newprot));
471519
}
472520

521+
#ifdef CONFIG_ARM64_HW_AFDBM
522+
/*
523+
* Atomic pte/pmd modifications.
524+
*/
525+
#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
526+
static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
527+
unsigned long address,
528+
pte_t *ptep)
529+
{
530+
pteval_t pteval;
531+
unsigned int tmp, res;
532+
533+
asm volatile("// ptep_test_and_clear_young\n"
534+
" prfm pstl1strm, %2\n"
535+
"1: ldxr %0, %2\n"
536+
" ubfx %w3, %w0, %5, #1 // extract PTE_AF (young)\n"
537+
" and %0, %0, %4 // clear PTE_AF\n"
538+
" stxr %w1, %0, %2\n"
539+
" cbnz %w1, 1b\n"
540+
: "=&r" (pteval), "=&r" (tmp), "+Q" (pte_val(*ptep)), "=&r" (res)
541+
: "L" (~PTE_AF), "I" (ilog2(PTE_AF)));
542+
543+
return res;
544+
}
545+
546+
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
547+
#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
548+
static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
549+
unsigned long address,
550+
pmd_t *pmdp)
551+
{
552+
return ptep_test_and_clear_young(vma, address, (pte_t *)pmdp);
553+
}
554+
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
555+
556+
#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
557+
static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
558+
unsigned long address, pte_t *ptep)
559+
{
560+
pteval_t old_pteval;
561+
unsigned int tmp;
562+
563+
asm volatile("// ptep_get_and_clear\n"
564+
" prfm pstl1strm, %2\n"
565+
"1: ldxr %0, %2\n"
566+
" stxr %w1, xzr, %2\n"
567+
" cbnz %w1, 1b\n"
568+
: "=&r" (old_pteval), "=&r" (tmp), "+Q" (pte_val(*ptep)));
569+
570+
return __pte(old_pteval);
571+
}
572+
573+
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
574+
#define __HAVE_ARCH_PMDP_GET_AND_CLEAR
575+
static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm,
576+
unsigned long address, pmd_t *pmdp)
577+
{
578+
return pte_pmd(ptep_get_and_clear(mm, address, (pte_t *)pmdp));
579+
}
580+
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
581+
582+
/*
583+
* ptep_set_wrprotect - mark read-only while trasferring potential hardware
584+
* dirty status (PTE_DBM && !PTE_RDONLY) to the software PTE_DIRTY bit.
585+
*/
586+
#define __HAVE_ARCH_PTEP_SET_WRPROTECT
587+
static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep)
588+
{
589+
pteval_t pteval;
590+
unsigned long tmp;
591+
592+
asm volatile("// ptep_set_wrprotect\n"
593+
" prfm pstl1strm, %2\n"
594+
"1: ldxr %0, %2\n"
595+
" tst %0, %4 // check for hw dirty (!PTE_RDONLY)\n"
596+
" csel %1, %3, xzr, eq // set PTE_DIRTY|PTE_RDONLY if dirty\n"
597+
" orr %0, %0, %1 // if !dirty, PTE_RDONLY is already set\n"
598+
" and %0, %0, %5 // clear PTE_WRITE/PTE_DBM\n"
599+
" stxr %w1, %0, %2\n"
600+
" cbnz %w1, 1b\n"
601+
: "=&r" (pteval), "=&r" (tmp), "+Q" (pte_val(*ptep))
602+
: "r" (PTE_DIRTY|PTE_RDONLY), "L" (PTE_RDONLY), "L" (~PTE_WRITE)
603+
: "cc");
604+
}
605+
606+
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
607+
#define __HAVE_ARCH_PMDP_SET_WRPROTECT
608+
static inline void pmdp_set_wrprotect(struct mm_struct *mm,
609+
unsigned long address, pmd_t *pmdp)
610+
{
611+
ptep_set_wrprotect(mm, address, (pte_t *)pmdp);
612+
}
613+
#endif
614+
#endif /* CONFIG_ARM64_HW_AFDBM */
615+
473616
extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
474617
extern pgd_t idmap_pg_dir[PTRS_PER_PGD];
475618

arch/arm64/mm/proc.S

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,19 @@ ENTRY(__cpu_setup)
196196
*/
197197
mrs x9, ID_AA64MMFR0_EL1
198198
bfi x10, x9, #32, #3
199+
#ifdef CONFIG_ARM64_HW_AFDBM
200+
/*
201+
* Hardware update of the Access and Dirty bits.
202+
*/
203+
mrs x9, ID_AA64MMFR1_EL1
204+
and x9, x9, #0xf
205+
cbz x9, 2f
206+
cmp x9, #2
207+
b.lt 1f
208+
orr x10, x10, #TCR_HD // hardware Dirty flag update
209+
1: orr x10, x10, #TCR_HA // hardware Access flag update
210+
2:
211+
#endif /* CONFIG_ARM64_HW_AFDBM */
199212
msr tcr_el1, x10
200213
ret // return to head.S
201214
ENDPROC(__cpu_setup)

0 commit comments

Comments
 (0)