Skip to content

Commit 1530866

Browse files
Torsten Duwempe
authored andcommitted
powerpc/ftrace: Add support for -mprofile-kernel ftrace ABI
The gcc switch -mprofile-kernel defines a new ABI for calling _mcount() very early in the function with minimal overhead. Although mprofile-kernel has been available since GCC 3.4, there were bugs which were only fixed recently. Currently it is known to work in GCC 4.9, 5 and 6. Additionally there are two possible code sequences generated by the flag, the first uses mflr/std/bl and the second is optimised to omit the std. Currently only gcc 6 has the optimised sequence. This patch supports both sequences. Initial work started by Vojtech Pavlik, used with permission. Key changes: - rework _mcount() to work for both the old and new ABIs. - implement new versions of ftrace_caller() and ftrace_graph_caller() which deal with the new ABI. - updates to __ftrace_make_nop() to recognise the new mcount calling sequence. - updates to __ftrace_make_call() to recognise the nop'ed sequence. - implement ftrace_modify_call(). - updates to the module loader to surpress the toc save in the module stub when calling mcount with the new ABI. Reviewed-by: Balbir Singh <[email protected]> Signed-off-by: Torsten Duwe <[email protected]> Signed-off-by: Michael Ellerman <[email protected]>
1 parent 9a7841a commit 1530866

File tree

5 files changed

+324
-20
lines changed

5 files changed

+324
-20
lines changed

arch/powerpc/include/asm/code-patching.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,4 +99,25 @@ static inline unsigned long ppc_global_function_entry(void *func)
9999
#endif
100100
}
101101

102+
#ifdef CONFIG_PPC64
103+
/*
104+
* Some instruction encodings commonly used in dynamic ftracing
105+
* and function live patching.
106+
*/
107+
108+
/* This must match the definition of STK_GOT in <asm/ppc_asm.h> */
109+
#if defined(_CALL_ELF) && _CALL_ELF == 2
110+
#define R2_STACK_OFFSET 24
111+
#else
112+
#define R2_STACK_OFFSET 40
113+
#endif
114+
115+
#define PPC_INST_LD_TOC (PPC_INST_LD | ___PPC_RT(__REG_R2) | \
116+
___PPC_RA(__REG_R1) | R2_STACK_OFFSET)
117+
118+
/* usually preceded by a mflr r0 */
119+
#define PPC_INST_STD_LR (PPC_INST_STD | ___PPC_RS(__REG_R0) | \
120+
___PPC_RA(__REG_R1) | PPC_LR_STKOFF)
121+
#endif /* CONFIG_PPC64 */
122+
102123
#endif /* _ASM_POWERPC_CODE_PATCHING_H */

arch/powerpc/include/asm/ftrace.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@
4646
extern void _mcount(void);
4747

4848
#ifdef CONFIG_DYNAMIC_FTRACE
49+
# define FTRACE_ADDR ((unsigned long)ftrace_caller)
50+
# define FTRACE_REGS_ADDR FTRACE_ADDR
4951
static inline unsigned long ftrace_call_adjust(unsigned long addr)
5052
{
5153
/* reloction of mcount call site is the same as the address */
@@ -58,6 +60,9 @@ struct dyn_arch_ftrace {
5860
#endif /* CONFIG_DYNAMIC_FTRACE */
5961
#endif /* __ASSEMBLY__ */
6062

63+
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
64+
#define ARCH_SUPPORTS_FTRACE_OPS 1
65+
#endif
6166
#endif
6267

6368
#if defined(CONFIG_FTRACE_SYSCALLS) && defined(CONFIG_PPC64) && !defined(__ASSEMBLY__)

arch/powerpc/kernel/entry_64.S

Lines changed: 165 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1143,8 +1143,12 @@ _GLOBAL(enter_prom)
11431143
#ifdef CONFIG_DYNAMIC_FTRACE
11441144
_GLOBAL(mcount)
11451145
_GLOBAL(_mcount)
1146-
blr
1146+
mflr r12
1147+
mtctr r12
1148+
mtlr r0
1149+
bctr
11471150

1151+
#ifndef CC_USING_MPROFILE_KERNEL
11481152
_GLOBAL_TOC(ftrace_caller)
11491153
/* Taken from output of objdump from lib64/glibc */
11501154
mflr r3
@@ -1166,6 +1170,115 @@ _GLOBAL(ftrace_graph_stub)
11661170
ld r0, 128(r1)
11671171
mtlr r0
11681172
addi r1, r1, 112
1173+
1174+
#else /* CC_USING_MPROFILE_KERNEL */
1175+
/*
1176+
*
1177+
* ftrace_caller() is the function that replaces _mcount() when ftrace is
1178+
* active.
1179+
*
1180+
* We arrive here after a function A calls function B, and we are the trace
1181+
* function for B. When we enter r1 points to A's stack frame, B has not yet
1182+
* had a chance to allocate one yet.
1183+
*
1184+
* Additionally r2 may point either to the TOC for A, or B, depending on
1185+
* whether B did a TOC setup sequence before calling us.
1186+
*
1187+
* On entry the LR points back to the _mcount() call site, and r0 holds the
1188+
* saved LR as it was on entry to B, ie. the original return address at the
1189+
* call site in A.
1190+
*
1191+
* Our job is to save the register state into a struct pt_regs (on the stack)
1192+
* and then arrange for the ftrace function to be called.
1193+
*/
1194+
_GLOBAL(ftrace_caller)
1195+
/* Save the original return address in A's stack frame */
1196+
std r0,LRSAVE(r1)
1197+
1198+
/* Create our stack frame + pt_regs */
1199+
stdu r1,-SWITCH_FRAME_SIZE(r1)
1200+
1201+
/* Save all gprs to pt_regs */
1202+
SAVE_8GPRS(0,r1)
1203+
SAVE_8GPRS(8,r1)
1204+
SAVE_8GPRS(16,r1)
1205+
SAVE_8GPRS(24,r1)
1206+
1207+
/* Load special regs for save below */
1208+
mfmsr r8
1209+
mfctr r9
1210+
mfxer r10
1211+
mfcr r11
1212+
1213+
/* Get the _mcount() call site out of LR */
1214+
mflr r7
1215+
/* Save it as pt_regs->nip & pt_regs->link */
1216+
std r7, _NIP(r1)
1217+
std r7, _LINK(r1)
1218+
1219+
/* Save callee's TOC in the ABI compliant location */
1220+
std r2, 24(r1)
1221+
ld r2,PACATOC(r13) /* get kernel TOC in r2 */
1222+
1223+
addis r3,r2,function_trace_op@toc@ha
1224+
addi r3,r3,function_trace_op@toc@l
1225+
ld r5,0(r3)
1226+
1227+
/* Calculate ip from nip-4 into r3 for call below */
1228+
subi r3, r7, MCOUNT_INSN_SIZE
1229+
1230+
/* Put the original return address in r4 as parent_ip */
1231+
mr r4, r0
1232+
1233+
/* Save special regs */
1234+
std r8, _MSR(r1)
1235+
std r9, _CTR(r1)
1236+
std r10, _XER(r1)
1237+
std r11, _CCR(r1)
1238+
1239+
/* Load &pt_regs in r6 for call below */
1240+
addi r6, r1 ,STACK_FRAME_OVERHEAD
1241+
1242+
/* ftrace_call(r3, r4, r5, r6) */
1243+
.globl ftrace_call
1244+
ftrace_call:
1245+
bl ftrace_stub
1246+
nop
1247+
1248+
/* Load ctr with the possibly modified NIP */
1249+
ld r3, _NIP(r1)
1250+
mtctr r3
1251+
1252+
/* Restore gprs */
1253+
REST_8GPRS(0,r1)
1254+
REST_8GPRS(8,r1)
1255+
REST_8GPRS(16,r1)
1256+
REST_8GPRS(24,r1)
1257+
1258+
/* Restore callee's TOC */
1259+
ld r2, 24(r1)
1260+
1261+
/* Pop our stack frame */
1262+
addi r1, r1, SWITCH_FRAME_SIZE
1263+
1264+
/* Restore original LR for return to B */
1265+
ld r0, LRSAVE(r1)
1266+
mtlr r0
1267+
1268+
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1269+
stdu r1, -112(r1)
1270+
.globl ftrace_graph_call
1271+
ftrace_graph_call:
1272+
b ftrace_graph_stub
1273+
_GLOBAL(ftrace_graph_stub)
1274+
addi r1, r1, 112
1275+
#endif
1276+
1277+
ld r0,LRSAVE(r1) /* restore callee's lr at _mcount site */
1278+
mtlr r0
1279+
bctr /* jump after _mcount site */
1280+
#endif /* CC_USING_MPROFILE_KERNEL */
1281+
11691282
_GLOBAL(ftrace_stub)
11701283
blr
11711284
#else
@@ -1198,6 +1311,7 @@ _GLOBAL(ftrace_stub)
11981311
#endif /* CONFIG_DYNAMIC_FTRACE */
11991312

12001313
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1314+
#ifndef CC_USING_MPROFILE_KERNEL
12011315
_GLOBAL(ftrace_graph_caller)
12021316
/* load r4 with local address */
12031317
ld r4, 128(r1)
@@ -1222,6 +1336,56 @@ _GLOBAL(ftrace_graph_caller)
12221336
addi r1, r1, 112
12231337
blr
12241338

1339+
#else /* CC_USING_MPROFILE_KERNEL */
1340+
_GLOBAL(ftrace_graph_caller)
1341+
/* with -mprofile-kernel, parameter regs are still alive at _mcount */
1342+
std r10, 104(r1)
1343+
std r9, 96(r1)
1344+
std r8, 88(r1)
1345+
std r7, 80(r1)
1346+
std r6, 72(r1)
1347+
std r5, 64(r1)
1348+
std r4, 56(r1)
1349+
std r3, 48(r1)
1350+
1351+
/* Save callee's TOC in the ABI compliant location */
1352+
std r2, 24(r1)
1353+
ld r2, PACATOC(r13) /* get kernel TOC in r2 */
1354+
1355+
mfctr r4 /* ftrace_caller has moved local addr here */
1356+
std r4, 40(r1)
1357+
mflr r3 /* ftrace_caller has restored LR from stack */
1358+
subi r4, r4, MCOUNT_INSN_SIZE
1359+
1360+
bl prepare_ftrace_return
1361+
nop
1362+
1363+
/*
1364+
* prepare_ftrace_return gives us the address we divert to.
1365+
* Change the LR to this.
1366+
*/
1367+
mtlr r3
1368+
1369+
ld r0, 40(r1)
1370+
mtctr r0
1371+
ld r10, 104(r1)
1372+
ld r9, 96(r1)
1373+
ld r8, 88(r1)
1374+
ld r7, 80(r1)
1375+
ld r6, 72(r1)
1376+
ld r5, 64(r1)
1377+
ld r4, 56(r1)
1378+
ld r3, 48(r1)
1379+
1380+
/* Restore callee's TOC */
1381+
ld r2, 24(r1)
1382+
1383+
addi r1, r1, 112
1384+
mflr r0
1385+
std r0, LRSAVE(r1)
1386+
bctr
1387+
#endif /* CC_USING_MPROFILE_KERNEL */
1388+
12251389
_GLOBAL(return_to_handler)
12261390
/* need to save return values */
12271391
std r4, -32(r1)

arch/powerpc/kernel/ftrace.c

Lines changed: 87 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,11 @@ ftrace_modify_code(unsigned long ip, unsigned int old, unsigned int new)
6161
return -EFAULT;
6262

6363
/* Make sure it is what we expect it to be */
64-
if (replaced != old)
64+
if (replaced != old) {
65+
pr_err("%p: replaced (%#x) != old (%#x)",
66+
(void *)ip, replaced, old);
6567
return -EINVAL;
68+
}
6669

6770
/* replace the text with the new text */
6871
if (patch_instruction((unsigned int *)ip, new))
@@ -108,11 +111,13 @@ __ftrace_make_nop(struct module *mod,
108111
{
109112
unsigned long entry, ptr, tramp;
110113
unsigned long ip = rec->ip;
111-
unsigned int op;
114+
unsigned int op, pop;
112115

113116
/* read where this goes */
114-
if (probe_kernel_read(&op, (void *)ip, sizeof(int)))
117+
if (probe_kernel_read(&op, (void *)ip, sizeof(int))) {
118+
pr_err("Fetching opcode failed.\n");
115119
return -EFAULT;
120+
}
116121

117122
/* Make sure that that this is still a 24bit jump */
118123
if (!is_bl_op(op)) {
@@ -152,10 +157,42 @@ __ftrace_make_nop(struct module *mod,
152157
*
153158
* Use a b +8 to jump over the load.
154159
*/
155-
op = 0x48000008; /* b +8 */
156160

157-
if (patch_instruction((unsigned int *)ip, op))
161+
pop = PPC_INST_BRANCH | 8; /* b +8 */
162+
163+
/*
164+
* Check what is in the next instruction. We can see ld r2,40(r1), but
165+
* on first pass after boot we will see mflr r0.
166+
*/
167+
if (probe_kernel_read(&op, (void *)(ip+4), MCOUNT_INSN_SIZE)) {
168+
pr_err("Fetching op failed.\n");
169+
return -EFAULT;
170+
}
171+
172+
if (op != PPC_INST_LD_TOC) {
173+
unsigned int inst;
174+
175+
if (probe_kernel_read(&inst, (void *)(ip - 4), 4)) {
176+
pr_err("Fetching instruction at %lx failed.\n", ip - 4);
177+
return -EFAULT;
178+
}
179+
180+
/* We expect either a mlfr r0, or a std r0, LRSAVE(r1) */
181+
if (inst != PPC_INST_MFLR && inst != PPC_INST_STD_LR) {
182+
pr_err("Unexpected instructions around bl _mcount\n"
183+
"when enabling dynamic ftrace!\t"
184+
"(%08x,bl,%08x)\n", inst, op);
185+
return -EINVAL;
186+
}
187+
188+
/* When using -mkernel_profile there is no load to jump over */
189+
pop = PPC_INST_NOP;
190+
}
191+
192+
if (patch_instruction((unsigned int *)ip, pop)) {
193+
pr_err("Patching NOP failed.\n");
158194
return -EPERM;
195+
}
159196

160197
return 0;
161198
}
@@ -281,16 +318,15 @@ int ftrace_make_nop(struct module *mod,
281318

282319
#ifdef CONFIG_MODULES
283320
#ifdef CONFIG_PPC64
321+
/*
322+
* Examine the existing instructions for __ftrace_make_call.
323+
* They should effectively be a NOP, and follow formal constraints,
324+
* depending on the ABI. Return false if they don't.
325+
*/
326+
#ifndef CC_USING_MPROFILE_KERNEL
284327
static int
285-
__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
328+
expected_nop_sequence(void *ip, unsigned int op0, unsigned int op1)
286329
{
287-
unsigned int op[2];
288-
void *ip = (void *)rec->ip;
289-
290-
/* read where this goes */
291-
if (probe_kernel_read(op, ip, sizeof(op)))
292-
return -EFAULT;
293-
294330
/*
295331
* We expect to see:
296332
*
@@ -300,8 +336,34 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
300336
* The load offset is different depending on the ABI. For simplicity
301337
* just mask it out when doing the compare.
302338
*/
303-
if ((op[0] != 0x48000008) || ((op[1] & 0xffff0000) != 0xe8410000)) {
304-
pr_err("Unexpected call sequence: %x %x\n", op[0], op[1]);
339+
if ((op0 != 0x48000008) || ((op1 & 0xffff0000) != 0xe8410000))
340+
return 0;
341+
return 1;
342+
}
343+
#else
344+
static int
345+
expected_nop_sequence(void *ip, unsigned int op0, unsigned int op1)
346+
{
347+
/* look for patched "NOP" on ppc64 with -mprofile-kernel */
348+
if (op0 != PPC_INST_NOP)
349+
return 0;
350+
return 1;
351+
}
352+
#endif
353+
354+
static int
355+
__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
356+
{
357+
unsigned int op[2];
358+
void *ip = (void *)rec->ip;
359+
360+
/* read where this goes */
361+
if (probe_kernel_read(op, ip, sizeof(op)))
362+
return -EFAULT;
363+
364+
if (!expected_nop_sequence(ip, op[0], op[1])) {
365+
pr_err("Unexpected call sequence at %p: %x %x\n",
366+
ip, op[0], op[1]);
305367
return -EINVAL;
306368
}
307369

@@ -324,7 +386,16 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
324386

325387
return 0;
326388
}
327-
#else
389+
390+
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
391+
int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
392+
unsigned long addr)
393+
{
394+
return ftrace_make_call(rec, addr);
395+
}
396+
#endif
397+
398+
#else /* !CONFIG_PPC64: */
328399
static int
329400
__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
330401
{

0 commit comments

Comments
 (0)