Skip to content

Commit e00b12e

Browse files
Peter ZijlstraIngo Molnar
authored andcommitted
perf/x86: Further optimize copy_from_user_nmi()
Now that we can deal with nested NMI due to IRET re-enabling NMIs and can deal with faults from NMI by making sure we preserve CR2 over NMIs we can in fact simply access user-space memory from NMI context. So rewrite copy_from_user_nmi() to use __copy_from_user_inatomic() and rework the fault path to do the minimal required work before taking the in_atomic() fault handler. In particular avoid perf_sw_event() which would make perf recurse on itself (it should be harmless as our recursion protections should be able to deal with this -- but why tempt fate). Also rename notify_page_fault() to kprobes_fault() as that is a much better name; there is no notifier in it and its specific to kprobes. Don measured that his worst case NMI path shrunk from ~300K cycles to ~150K cycles. Cc: Stephane Eranian <[email protected]> Cc: [email protected] Cc: Arnaldo Carvalho de Melo <[email protected]> Cc: Linus Torvalds <[email protected]> Cc: Andi Kleen <[email protected]> Cc: [email protected] Tested-by: Don Zickus <[email protected]> Signed-off-by: Peter Zijlstra <[email protected]> Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Ingo Molnar <[email protected]>
1 parent 2c42cfb commit e00b12e

File tree

2 files changed

+36
-48
lines changed

2 files changed

+36
-48
lines changed

arch/x86/lib/usercopy.c

Lines changed: 15 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -11,39 +11,26 @@
1111
#include <linux/sched.h>
1212

1313
/*
14-
* best effort, GUP based copy_from_user() that is NMI-safe
14+
* We rely on the nested NMI work to allow atomic faults from the NMI path; the
15+
* nested NMI paths are careful to preserve CR2.
1516
*/
1617
unsigned long
1718
copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
1819
{
19-
unsigned long offset, addr = (unsigned long)from;
20-
unsigned long size, len = 0;
21-
struct page *page;
22-
void *map;
23-
int ret;
20+
unsigned long ret;
2421

2522
if (__range_not_ok(from, n, TASK_SIZE))
26-
return len;
27-
28-
do {
29-
ret = __get_user_pages_fast(addr, 1, 0, &page);
30-
if (!ret)
31-
break;
32-
33-
offset = addr & (PAGE_SIZE - 1);
34-
size = min(PAGE_SIZE - offset, n - len);
35-
36-
map = kmap_atomic(page);
37-
memcpy(to, map+offset, size);
38-
kunmap_atomic(map);
39-
put_page(page);
40-
41-
len += size;
42-
to += size;
43-
addr += size;
44-
45-
} while (len < n);
46-
47-
return len;
23+
return 0;
24+
25+
/*
26+
* Even though this function is typically called from NMI/IRQ context
27+
* disable pagefaults so that its behaviour is consistent even when
28+
* called form other contexts.
29+
*/
30+
pagefault_disable();
31+
ret = __copy_from_user_inatomic(to, from, n);
32+
pagefault_enable();
33+
34+
return n - ret;
4835
}
4936
EXPORT_SYMBOL_GPL(copy_from_user_nmi);

arch/x86/mm/fault.c

Lines changed: 21 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ kmmio_fault(struct pt_regs *regs, unsigned long addr)
5151
return 0;
5252
}
5353

54-
static inline int __kprobes notify_page_fault(struct pt_regs *regs)
54+
static inline int __kprobes kprobes_fault(struct pt_regs *regs)
5555
{
5656
int ret = 0;
5757

@@ -1048,7 +1048,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code)
10481048
return;
10491049

10501050
/* kprobes don't want to hook the spurious faults: */
1051-
if (notify_page_fault(regs))
1051+
if (kprobes_fault(regs))
10521052
return;
10531053
/*
10541054
* Don't take the mm semaphore here. If we fixup a prefetch
@@ -1060,23 +1060,8 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code)
10601060
}
10611061

10621062
/* kprobes don't want to hook the spurious faults: */
1063-
if (unlikely(notify_page_fault(regs)))
1063+
if (unlikely(kprobes_fault(regs)))
10641064
return;
1065-
/*
1066-
* It's safe to allow irq's after cr2 has been saved and the
1067-
* vmalloc fault has been handled.
1068-
*
1069-
* User-mode registers count as a user access even for any
1070-
* potential system fault or CPU buglet:
1071-
*/
1072-
if (user_mode_vm(regs)) {
1073-
local_irq_enable();
1074-
error_code |= PF_USER;
1075-
flags |= FAULT_FLAG_USER;
1076-
} else {
1077-
if (regs->flags & X86_EFLAGS_IF)
1078-
local_irq_enable();
1079-
}
10801065

10811066
if (unlikely(error_code & PF_RSVD))
10821067
pgtable_bad(regs, error_code, address);
@@ -1088,8 +1073,6 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code)
10881073
}
10891074
}
10901075

1091-
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
1092-
10931076
/*
10941077
* If we're in an interrupt, have no user context or are running
10951078
* in an atomic region then we must not take the fault:
@@ -1099,6 +1082,24 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code)
10991082
return;
11001083
}
11011084

1085+
/*
1086+
* It's safe to allow irq's after cr2 has been saved and the
1087+
* vmalloc fault has been handled.
1088+
*
1089+
* User-mode registers count as a user access even for any
1090+
* potential system fault or CPU buglet:
1091+
*/
1092+
if (user_mode_vm(regs)) {
1093+
local_irq_enable();
1094+
error_code |= PF_USER;
1095+
flags |= FAULT_FLAG_USER;
1096+
} else {
1097+
if (regs->flags & X86_EFLAGS_IF)
1098+
local_irq_enable();
1099+
}
1100+
1101+
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
1102+
11021103
if (error_code & PF_WRITE)
11031104
flags |= FAULT_FLAG_WRITE;
11041105

0 commit comments

Comments
 (0)