Skip to content

Commit 4facb95

Browse files
committed
x86/entry: Unbreak 32bit fast syscall
Andy reported that the syscall treacing for 32bit fast syscall fails: # ./tools/testing/selftests/x86/ptrace_syscall_32 ... [RUN] SYSEMU [FAIL] Initial args are wrong (nr=224, args=10 11 12 13 14 4289172732) ... [RUN] SYSCALL [FAIL] Initial args are wrong (nr=29, args=0 0 0 0 0 4289172732) The eason is that the conversion to generic entry code moved the retrieval of the sixth argument (EBP) after the point where the syscall entry work runs, i.e. ptrace, seccomp, audit... Unbreak it by providing a split up version of syscall_enter_from_user_mode(). - syscall_enter_from_user_mode_prepare() establishes state and enables interrupts - syscall_enter_from_user_mode_work() runs the entry work Replace the call to syscall_enter_from_user_mode() in the 32bit fast syscall C-entry with the split functions and stick the EBP retrieval between them. Fixes: 27d6b4d ("x86/entry: Use generic syscall entry function") Reported-by: Andy Lutomirski <[email protected]> Signed-off-by: Thomas Gleixner <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent d5c678a commit 4facb95

File tree

3 files changed

+91
-24
lines changed

3 files changed

+91
-24
lines changed

arch/x86/entry/common.c

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -60,16 +60,10 @@ __visible noinstr void do_syscall_64(unsigned long nr, struct pt_regs *regs)
6060
#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
6161
static __always_inline unsigned int syscall_32_enter(struct pt_regs *regs)
6262
{
63-
unsigned int nr = (unsigned int)regs->orig_ax;
64-
6563
if (IS_ENABLED(CONFIG_IA32_EMULATION))
6664
current_thread_info()->status |= TS_COMPAT;
67-
/*
68-
* Subtlety here: if ptrace pokes something larger than 2^32-1 into
69-
* orig_ax, the unsigned int return value truncates it. This may
70-
* or may not be necessary, but it matches the old asm behavior.
71-
*/
72-
return (unsigned int)syscall_enter_from_user_mode(regs, nr);
65+
66+
return (unsigned int)regs->orig_ax;
7367
}
7468

7569
/*
@@ -91,15 +85,29 @@ __visible noinstr void do_int80_syscall_32(struct pt_regs *regs)
9185
{
9286
unsigned int nr = syscall_32_enter(regs);
9387

88+
/*
89+
* Subtlety here: if ptrace pokes something larger than 2^32-1 into
90+
* orig_ax, the unsigned int return value truncates it. This may
91+
* or may not be necessary, but it matches the old asm behavior.
92+
*/
93+
nr = (unsigned int)syscall_enter_from_user_mode(regs, nr);
94+
9495
do_syscall_32_irqs_on(regs, nr);
9596
syscall_exit_to_user_mode(regs);
9697
}
9798

9899
static noinstr bool __do_fast_syscall_32(struct pt_regs *regs)
99100
{
100-
unsigned int nr = syscall_32_enter(regs);
101+
unsigned int nr = syscall_32_enter(regs);
101102
int res;
102103

104+
/*
105+
* This cannot use syscall_enter_from_user_mode() as it has to
106+
* fetch EBP before invoking any of the syscall entry work
107+
* functions.
108+
*/
109+
syscall_enter_from_user_mode_prepare(regs);
110+
103111
instrumentation_begin();
104112
/* Fetch EBP from where the vDSO stashed it. */
105113
if (IS_ENABLED(CONFIG_X86_64)) {
@@ -122,6 +130,9 @@ static noinstr bool __do_fast_syscall_32(struct pt_regs *regs)
122130
return false;
123131
}
124132

133+
/* The case truncates any ptrace induced syscall nr > 2^32 -1 */
134+
nr = (unsigned int)syscall_enter_from_user_mode_work(regs, nr);
135+
125136
/* Now this is just like a normal syscall. */
126137
do_syscall_32_irqs_on(regs, nr);
127138
syscall_exit_to_user_mode(regs);

include/linux/entry-common.h

Lines changed: 42 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -110,15 +110,30 @@ static inline __must_check int arch_syscall_enter_tracehook(struct pt_regs *regs
110110
#endif
111111

112112
/**
113-
* syscall_enter_from_user_mode - Check and handle work before invoking
114-
* a syscall
113+
* syscall_enter_from_user_mode_prepare - Establish state and enable interrupts
115114
* @regs: Pointer to currents pt_regs
116-
* @syscall: The syscall number
117115
*
118116
* Invoked from architecture specific syscall entry code with interrupts
119117
* disabled. The calling code has to be non-instrumentable. When the
120-
* function returns all state is correct and the subsequent functions can be
121-
* instrumented.
118+
* function returns all state is correct, interrupts are enabled and the
119+
* subsequent functions can be instrumented.
120+
*
121+
* This handles lockdep, RCU (context tracking) and tracing state.
122+
*
123+
* This is invoked when there is extra architecture specific functionality
124+
* to be done between establishing state and handling user mode entry work.
125+
*/
126+
void syscall_enter_from_user_mode_prepare(struct pt_regs *regs);
127+
128+
/**
129+
* syscall_enter_from_user_mode_work - Check and handle work before invoking
130+
* a syscall
131+
* @regs: Pointer to currents pt_regs
132+
* @syscall: The syscall number
133+
*
134+
* Invoked from architecture specific syscall entry code with interrupts
135+
* enabled after invoking syscall_enter_from_user_mode_prepare() and extra
136+
* architecture specific work.
122137
*
123138
* Returns: The original or a modified syscall number
124139
*
@@ -127,12 +142,30 @@ static inline __must_check int arch_syscall_enter_tracehook(struct pt_regs *regs
127142
* syscall_set_return_value() first. If neither of those are called and -1
128143
* is returned, then the syscall will fail with ENOSYS.
129144
*
130-
* The following functionality is handled here:
145+
* It handles the following work items:
131146
*
132-
* 1) Establish state (lockdep, RCU (context tracking), tracing)
133-
* 2) TIF flag dependent invocations of arch_syscall_enter_tracehook(),
147+
* 1) TIF flag dependent invocations of arch_syscall_enter_tracehook(),
134148
* __secure_computing(), trace_sys_enter()
135-
* 3) Invocation of audit_syscall_entry()
149+
* 2) Invocation of audit_syscall_entry()
150+
*/
151+
long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall);
152+
153+
/**
154+
* syscall_enter_from_user_mode - Establish state and check and handle work
155+
* before invoking a syscall
156+
* @regs: Pointer to currents pt_regs
157+
* @syscall: The syscall number
158+
*
159+
* Invoked from architecture specific syscall entry code with interrupts
160+
* disabled. The calling code has to be non-instrumentable. When the
161+
* function returns all state is correct, interrupts are enabled and the
162+
* subsequent functions can be instrumented.
163+
*
164+
* This is combination of syscall_enter_from_user_mode_prepare() and
165+
* syscall_enter_from_user_mode_work().
166+
*
167+
* Returns: The original or a modified syscall number. See
168+
* syscall_enter_from_user_mode_work() for further explanation.
136169
*/
137170
long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall);
138171

kernel/entry/common.c

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -69,22 +69,45 @@ static long syscall_trace_enter(struct pt_regs *regs, long syscall,
6969
return ret ? : syscall_get_nr(current, regs);
7070
}
7171

72-
noinstr long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall)
72+
static __always_inline long
73+
__syscall_enter_from_user_work(struct pt_regs *regs, long syscall)
7374
{
7475
unsigned long ti_work;
7576

76-
enter_from_user_mode(regs);
77-
instrumentation_begin();
78-
79-
local_irq_enable();
8077
ti_work = READ_ONCE(current_thread_info()->flags);
8178
if (ti_work & SYSCALL_ENTER_WORK)
8279
syscall = syscall_trace_enter(regs, syscall, ti_work);
83-
instrumentation_end();
8480

8581
return syscall;
8682
}
8783

84+
long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall)
85+
{
86+
return __syscall_enter_from_user_work(regs, syscall);
87+
}
88+
89+
noinstr long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall)
90+
{
91+
long ret;
92+
93+
enter_from_user_mode(regs);
94+
95+
instrumentation_begin();
96+
local_irq_enable();
97+
ret = __syscall_enter_from_user_work(regs, syscall);
98+
instrumentation_end();
99+
100+
return ret;
101+
}
102+
103+
noinstr void syscall_enter_from_user_mode_prepare(struct pt_regs *regs)
104+
{
105+
enter_from_user_mode(regs);
106+
instrumentation_begin();
107+
local_irq_enable();
108+
instrumentation_end();
109+
}
110+
88111
/**
89112
* exit_to_user_mode - Fixup state when exiting to user mode
90113
*

0 commit comments

Comments
 (0)