Skip to content

Commit 7d06d9c

Browse files
hansendcKAGA-KOKO
authored andcommitted
mm: Implement new pkey_mprotect() system call
pkey_mprotect() is just like mprotect, except it also takes a protection key as an argument. On systems that do not support protection keys, it still works, but requires that key=0. Otherwise it does exactly what mprotect does. I expect it to get used like this, if you want to guarantee that any mapping you create can *never* be accessed without the right protection keys set up. int real_prot = PROT_READ|PROT_WRITE; pkey = pkey_alloc(0, PKEY_DENY_ACCESS); ptr = mmap(NULL, PAGE_SIZE, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); ret = pkey_mprotect(ptr, PAGE_SIZE, real_prot, pkey); This way, there is *no* window where the mapping is accessible since it was always either PROT_NONE or had a protection key set that denied all access. We settled on 'unsigned long' for the type of the key here. We only need 4 bits on x86 today, but I figured that other architectures might need some more space. Semantically, we have a bit of a problem if we combine this syscall with our previously-introduced execute-only support: What do we do when we mix execute-only pkey use with pkey_mprotect() use? For instance: pkey_mprotect(ptr, PAGE_SIZE, PROT_WRITE, 6); // set pkey=6 mprotect(ptr, PAGE_SIZE, PROT_EXEC); // set pkey=X_ONLY_PKEY? mprotect(ptr, PAGE_SIZE, PROT_WRITE); // is pkey=6 again? To solve that, we make the plain-mprotect()-initiated execute-only support only apply to VMAs that have the default protection key (0) set on them. Proposed semantics: 1. protection key 0 is special and represents the default, "unassigned" protection key. It is always allocated. 2. mprotect() never affects a mapping's pkey_mprotect()-assigned protection key. A protection key of 0 (even if set explicitly) represents an unassigned protection key. 2a. mprotect(PROT_EXEC) on a mapping with an assigned protection key may or may not result in a mapping with execute-only properties. pkey_mprotect() plus pkey_set() on all threads should be used to _guarantee_ execute-only semantics if this is not a strong enough semantic. 3. mprotect(PROT_EXEC) may result in an "execute-only" mapping. The kernel will internally attempt to allocate and dedicate a protection key for the purpose of execute-only mappings. This may not be possible in cases where there are no free protection keys available. It can also happen, of course, in situations where there is no hardware support for protection keys. Signed-off-by: Dave Hansen <[email protected]> Acked-by: Mel Gorman <[email protected]> Cc: [email protected] Cc: Dave Hansen <[email protected]> Cc: [email protected] Cc: [email protected] Cc: [email protected] Cc: [email protected] Cc: [email protected] Cc: [email protected] Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Thomas Gleixner <[email protected]>
1 parent e8c6226 commit 7d06d9c

File tree

4 files changed

+45
-23
lines changed

4 files changed

+45
-23
lines changed

arch/x86/include/asm/mmu_context.h

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#include <asm/desc.h>
55
#include <linux/atomic.h>
66
#include <linux/mm_types.h>
7+
#include <linux/pkeys.h>
78

89
#include <trace/events/tlb.h>
910

@@ -195,16 +196,20 @@ static inline void arch_unmap(struct mm_struct *mm, struct vm_area_struct *vma,
195196
mpx_notify_unmap(mm, vma, start, end);
196197
}
197198

199+
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
198200
static inline int vma_pkey(struct vm_area_struct *vma)
199201
{
200-
u16 pkey = 0;
201-
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
202202
unsigned long vma_pkey_mask = VM_PKEY_BIT0 | VM_PKEY_BIT1 |
203203
VM_PKEY_BIT2 | VM_PKEY_BIT3;
204-
pkey = (vma->vm_flags & vma_pkey_mask) >> VM_PKEY_SHIFT;
205-
#endif
206-
return pkey;
204+
205+
return (vma->vm_flags & vma_pkey_mask) >> VM_PKEY_SHIFT;
206+
}
207+
#else
208+
static inline int vma_pkey(struct vm_area_struct *vma)
209+
{
210+
return 0;
207211
}
212+
#endif
208213

209214
static inline bool __pkru_allows_pkey(u16 pkey, bool write)
210215
{

arch/x86/include/asm/pkeys.h

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,12 @@
11
#ifndef _ASM_X86_PKEYS_H
22
#define _ASM_X86_PKEYS_H
33

4-
#define arch_max_pkey() (boot_cpu_has(X86_FEATURE_OSPKE) ? 16 : 1)
4+
#define PKEY_DEDICATED_EXECUTE_ONLY 15
5+
/*
6+
* Consider the PKEY_DEDICATED_EXECUTE_ONLY key unavailable.
7+
*/
8+
#define arch_max_pkey() (boot_cpu_has(X86_FEATURE_OSPKE) ? \
9+
PKEY_DEDICATED_EXECUTE_ONLY : 1)
510

611
extern int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
712
unsigned long init_val);
@@ -10,7 +15,6 @@ extern int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
1015
* Try to dedicate one of the protection keys to be used as an
1116
* execute-only protection key.
1217
*/
13-
#define PKEY_DEDICATED_EXECUTE_ONLY 15
1418
extern int __execute_only_pkey(struct mm_struct *mm);
1519
static inline int execute_only_pkey(struct mm_struct *mm)
1620
{
@@ -31,4 +35,7 @@ static inline int arch_override_mprotect_pkey(struct vm_area_struct *vma,
3135
return __arch_override_mprotect_pkey(vma, prot, pkey);
3236
}
3337

38+
extern int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
39+
unsigned long init_val);
40+
3441
#endif /*_ASM_X86_PKEYS_H */

include/linux/pkeys.h

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -18,16 +18,4 @@
1818
#define PKEY_DEDICATED_EXECUTE_ONLY 0
1919
#endif /* ! CONFIG_ARCH_HAS_PKEYS */
2020

21-
/*
22-
* This is called from mprotect_pkey().
23-
*
24-
* Returns true if the protection keys is valid.
25-
*/
26-
static inline bool validate_pkey(int pkey)
27-
{
28-
if (pkey < 0)
29-
return false;
30-
return (pkey < arch_max_pkey());
31-
}
32-
3321
#endif /* _LINUX_PKEYS_H */

mm/mprotect.c

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -352,15 +352,24 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
352352
return error;
353353
}
354354

355-
SYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len,
356-
unsigned long, prot)
355+
/*
356+
* pkey==-1 when doing a legacy mprotect()
357+
*/
358+
static int do_mprotect_pkey(unsigned long start, size_t len,
359+
unsigned long prot, int pkey)
357360
{
358361
unsigned long nstart, end, tmp, reqprot;
359362
struct vm_area_struct *vma, *prev;
360363
int error = -EINVAL;
361364
const int grows = prot & (PROT_GROWSDOWN|PROT_GROWSUP);
362365
const bool rier = (current->personality & READ_IMPLIES_EXEC) &&
363366
(prot & PROT_READ);
367+
/*
368+
* A temporary safety check since we are not validating
369+
* the pkey before we introduce the allocation code.
370+
*/
371+
if (pkey != -1)
372+
return -EINVAL;
364373

365374
prot &= ~(PROT_GROWSDOWN|PROT_GROWSUP);
366375
if (grows == (PROT_GROWSDOWN|PROT_GROWSUP)) /* can't be both */
@@ -409,15 +418,16 @@ SYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len,
409418

410419
for (nstart = start ; ; ) {
411420
unsigned long newflags;
412-
int pkey = arch_override_mprotect_pkey(vma, prot, -1);
421+
int new_vma_pkey;
413422

414423
/* Here we know that vma->vm_start <= nstart < vma->vm_end. */
415424

416425
/* Does the application expect PROT_READ to imply PROT_EXEC */
417426
if (rier && (vma->vm_flags & VM_MAYEXEC))
418427
prot |= PROT_EXEC;
419428

420-
newflags = calc_vm_prot_bits(prot, pkey);
429+
new_vma_pkey = arch_override_mprotect_pkey(vma, prot, pkey);
430+
newflags = calc_vm_prot_bits(prot, new_vma_pkey);
421431
newflags |= (vma->vm_flags & ~(VM_READ | VM_WRITE | VM_EXEC));
422432

423433
/* newflags >> 4 shift VM_MAY% in place of VM_% */
@@ -454,3 +464,15 @@ SYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len,
454464
up_write(&current->mm->mmap_sem);
455465
return error;
456466
}
467+
468+
SYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len,
469+
unsigned long, prot)
470+
{
471+
return do_mprotect_pkey(start, len, prot, -1);
472+
}
473+
474+
SYSCALL_DEFINE4(pkey_mprotect, unsigned long, start, size_t, len,
475+
unsigned long, prot, int, pkey)
476+
{
477+
return do_mprotect_pkey(start, len, prot, pkey);
478+
}

0 commit comments

Comments
 (0)