Skip to content

Commit c3d6324

Browse files
Peter ZijlstraAlexei Starovoitov
authored andcommitted
x86/alternatives: Teach text_poke_bp() to emulate instructions
In preparation for static_call and variable size jump_label support, teach text_poke_bp() to emulate instructions, namely: JMP32, JMP8, CALL, NOP2, NOP_ATOMIC5, INT3 The current text_poke_bp() takes a @handler argument which is used as a jump target when the temporary INT3 is hit by a different CPU. When patching CALL instructions, this doesn't work because we'd miss the PUSH of the return address. Instead, teach poke_int3_handler() to emulate an instruction, typically the instruction we're patching in. This fits almost all text_poke_bp() users, except arch_unoptimize_kprobe() which restores random text, and for that site we have to build an explicit emulate instruction. Tested-by: Alexei Starovoitov <[email protected]> Tested-by: Steven Rostedt (VMware) <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Reviewed-by: Masami Hiramatsu <[email protected]> Reviewed-by: Daniel Bristot de Oliveira <[email protected]> Acked-by: Alexei Starovoitov <[email protected]> Cc: Andy Lutomirski <[email protected]> Cc: Borislav Petkov <[email protected]> Cc: H. Peter Anvin <[email protected]> Cc: Josh Poimboeuf <[email protected]> Cc: Linus Torvalds <[email protected]> Cc: Steven Rostedt <[email protected]> Cc: Thomas Gleixner <[email protected]> Link: https://lkml.kernel.org/r/[email protected] Signed-off-by: Ingo Molnar <[email protected]> (cherry picked from commit 8c7eebc10687af45ac8e40ad1bac0cf7893dba9f) Signed-off-by: Alexei Starovoitov <[email protected]>
1 parent 808c9f7 commit c3d6324

File tree

4 files changed

+130
-46
lines changed

4 files changed

+130
-46
lines changed

arch/x86/include/asm/text-patching.h

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,11 @@ static inline void apply_paravirt(struct paravirt_patch_site *start,
2626
#define POKE_MAX_OPCODE_SIZE 5
2727

2828
struct text_poke_loc {
29-
void *detour;
3029
void *addr;
31-
size_t len;
32-
const char opcode[POKE_MAX_OPCODE_SIZE];
30+
int len;
31+
s32 rel32;
32+
u8 opcode;
33+
const u8 text[POKE_MAX_OPCODE_SIZE];
3334
};
3435

3536
extern void text_poke_early(void *addr, const void *opcode, size_t len);
@@ -51,8 +52,10 @@ extern void text_poke_early(void *addr, const void *opcode, size_t len);
5152
extern void *text_poke(void *addr, const void *opcode, size_t len);
5253
extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len);
5354
extern int poke_int3_handler(struct pt_regs *regs);
54-
extern void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler);
55+
extern void text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate);
5556
extern void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries);
57+
extern void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
58+
const void *opcode, size_t len, const void *emulate);
5659
extern int after_bootmem;
5760
extern __ro_after_init struct mm_struct *poking_mm;
5861
extern __ro_after_init unsigned long poking_addr;
@@ -63,8 +66,17 @@ static inline void int3_emulate_jmp(struct pt_regs *regs, unsigned long ip)
6366
regs->ip = ip;
6467
}
6568

66-
#define INT3_INSN_SIZE 1
67-
#define CALL_INSN_SIZE 5
69+
#define INT3_INSN_SIZE 1
70+
#define INT3_INSN_OPCODE 0xCC
71+
72+
#define CALL_INSN_SIZE 5
73+
#define CALL_INSN_OPCODE 0xE8
74+
75+
#define JMP32_INSN_SIZE 5
76+
#define JMP32_INSN_OPCODE 0xE9
77+
78+
#define JMP8_INSN_SIZE 2
79+
#define JMP8_INSN_OPCODE 0xEB
6880

6981
static inline void int3_emulate_push(struct pt_regs *regs, unsigned long val)
7082
{

arch/x86/kernel/alternative.c

Lines changed: 101 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -956,16 +956,15 @@ NOKPROBE_SYMBOL(patch_cmp);
956956
int poke_int3_handler(struct pt_regs *regs)
957957
{
958958
struct text_poke_loc *tp;
959-
unsigned char int3 = 0xcc;
960959
void *ip;
961960

962961
/*
963962
* Having observed our INT3 instruction, we now must observe
964963
* bp_patching.nr_entries.
965964
*
966-
* nr_entries != 0 INT3
967-
* WMB RMB
968-
* write INT3 if (nr_entries)
965+
* nr_entries != 0 INT3
966+
* WMB RMB
967+
* write INT3 if (nr_entries)
969968
*
970969
* Idem for other elements in bp_patching.
971970
*/
@@ -978,9 +977,9 @@ int poke_int3_handler(struct pt_regs *regs)
978977
return 0;
979978

980979
/*
981-
* Discount the sizeof(int3). See text_poke_bp_batch().
980+
* Discount the INT3. See text_poke_bp_batch().
982981
*/
983-
ip = (void *) regs->ip - sizeof(int3);
982+
ip = (void *) regs->ip - INT3_INSN_SIZE;
984983

985984
/*
986985
* Skip the binary search if there is a single member in the vector.
@@ -997,8 +996,28 @@ int poke_int3_handler(struct pt_regs *regs)
997996
return 0;
998997
}
999998

1000-
/* set up the specified breakpoint detour */
1001-
regs->ip = (unsigned long) tp->detour;
999+
ip += tp->len;
1000+
1001+
switch (tp->opcode) {
1002+
case INT3_INSN_OPCODE:
1003+
/*
1004+
* Someone poked an explicit INT3, they'll want to handle it,
1005+
* do not consume.
1006+
*/
1007+
return 0;
1008+
1009+
case CALL_INSN_OPCODE:
1010+
int3_emulate_call(regs, (long)ip + tp->rel32);
1011+
break;
1012+
1013+
case JMP32_INSN_OPCODE:
1014+
case JMP8_INSN_OPCODE:
1015+
int3_emulate_jmp(regs, (long)ip + tp->rel32);
1016+
break;
1017+
1018+
default:
1019+
BUG();
1020+
}
10021021

10031022
return 1;
10041023
}
@@ -1014,7 +1033,7 @@ NOKPROBE_SYMBOL(poke_int3_handler);
10141033
* synchronization using int3 breakpoint.
10151034
*
10161035
* The way it is done:
1017-
* - For each entry in the vector:
1036+
* - For each entry in the vector:
10181037
* - add a int3 trap to the address that will be patched
10191038
* - sync cores
10201039
* - For each entry in the vector:
@@ -1027,9 +1046,9 @@ NOKPROBE_SYMBOL(poke_int3_handler);
10271046
*/
10281047
void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
10291048
{
1030-
int patched_all_but_first = 0;
1031-
unsigned char int3 = 0xcc;
1049+
unsigned char int3 = INT3_INSN_OPCODE;
10321050
unsigned int i;
1051+
int do_sync;
10331052

10341053
lockdep_assert_held(&text_mutex);
10351054

@@ -1053,16 +1072,16 @@ void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
10531072
/*
10541073
* Second step: update all but the first byte of the patched range.
10551074
*/
1056-
for (i = 0; i < nr_entries; i++) {
1075+
for (do_sync = 0, i = 0; i < nr_entries; i++) {
10571076
if (tp[i].len - sizeof(int3) > 0) {
10581077
text_poke((char *)tp[i].addr + sizeof(int3),
1059-
(const char *)tp[i].opcode + sizeof(int3),
1078+
(const char *)tp[i].text + sizeof(int3),
10601079
tp[i].len - sizeof(int3));
1061-
patched_all_but_first++;
1080+
do_sync++;
10621081
}
10631082
}
10641083

1065-
if (patched_all_but_first) {
1084+
if (do_sync) {
10661085
/*
10671086
* According to Intel, this core syncing is very likely
10681087
* not necessary and we'd be safe even without it. But
@@ -1075,10 +1094,17 @@ void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
10751094
* Third step: replace the first byte (int3) by the first byte of
10761095
* replacing opcode.
10771096
*/
1078-
for (i = 0; i < nr_entries; i++)
1079-
text_poke(tp[i].addr, tp[i].opcode, sizeof(int3));
1097+
for (do_sync = 0, i = 0; i < nr_entries; i++) {
1098+
if (tp[i].text[0] == INT3_INSN_OPCODE)
1099+
continue;
1100+
1101+
text_poke(tp[i].addr, tp[i].text, sizeof(int3));
1102+
do_sync++;
1103+
}
1104+
1105+
if (do_sync)
1106+
on_each_cpu(do_sync_core, NULL, 1);
10801107

1081-
on_each_cpu(do_sync_core, NULL, 1);
10821108
/*
10831109
* sync_core() implies an smp_mb() and orders this store against
10841110
* the writing of the new instruction.
@@ -1087,6 +1113,60 @@ void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
10871113
bp_patching.nr_entries = 0;
10881114
}
10891115

1116+
void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
1117+
const void *opcode, size_t len, const void *emulate)
1118+
{
1119+
struct insn insn;
1120+
1121+
if (!opcode)
1122+
opcode = (void *)tp->text;
1123+
else
1124+
memcpy((void *)tp->text, opcode, len);
1125+
1126+
if (!emulate)
1127+
emulate = opcode;
1128+
1129+
kernel_insn_init(&insn, emulate, MAX_INSN_SIZE);
1130+
insn_get_length(&insn);
1131+
1132+
BUG_ON(!insn_complete(&insn));
1133+
BUG_ON(len != insn.length);
1134+
1135+
tp->addr = addr;
1136+
tp->len = len;
1137+
tp->opcode = insn.opcode.bytes[0];
1138+
1139+
switch (tp->opcode) {
1140+
case INT3_INSN_OPCODE:
1141+
break;
1142+
1143+
case CALL_INSN_OPCODE:
1144+
case JMP32_INSN_OPCODE:
1145+
case JMP8_INSN_OPCODE:
1146+
tp->rel32 = insn.immediate.value;
1147+
break;
1148+
1149+
default: /* assume NOP */
1150+
switch (len) {
1151+
case 2: /* NOP2 -- emulate as JMP8+0 */
1152+
BUG_ON(memcmp(emulate, ideal_nops[len], len));
1153+
tp->opcode = JMP8_INSN_OPCODE;
1154+
tp->rel32 = 0;
1155+
break;
1156+
1157+
case 5: /* NOP5 -- emulate as JMP32+0 */
1158+
BUG_ON(memcmp(emulate, ideal_nops[NOP_ATOMIC5], len));
1159+
tp->opcode = JMP32_INSN_OPCODE;
1160+
tp->rel32 = 0;
1161+
break;
1162+
1163+
default: /* unknown instruction */
1164+
BUG();
1165+
}
1166+
break;
1167+
}
1168+
}
1169+
10901170
/**
10911171
* text_poke_bp() -- update instructions on live kernel on SMP
10921172
* @addr: address to patch
@@ -1098,20 +1178,10 @@ void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
10981178
* dynamically allocated memory. This function should be used when it is
10991179
* not possible to allocate memory.
11001180
*/
1101-
void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler)
1181+
void text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate)
11021182
{
1103-
struct text_poke_loc tp = {
1104-
.detour = handler,
1105-
.addr = addr,
1106-
.len = len,
1107-
};
1108-
1109-
if (len > POKE_MAX_OPCODE_SIZE) {
1110-
WARN_ONCE(1, "len is larger than %d\n", POKE_MAX_OPCODE_SIZE);
1111-
return;
1112-
}
1113-
1114-
memcpy((void *)tp.opcode, opcode, len);
1183+
struct text_poke_loc tp;
11151184

1185+
text_poke_loc_init(&tp, addr, opcode, len, emulate);
11161186
text_poke_bp_batch(&tp, 1);
11171187
}

arch/x86/kernel/jump_label.c

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -89,8 +89,7 @@ static void __ref __jump_label_transform(struct jump_entry *entry,
8989
return;
9090
}
9191

92-
text_poke_bp((void *)jump_entry_code(entry), &code, JUMP_LABEL_NOP_SIZE,
93-
(void *)jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE);
92+
text_poke_bp((void *)jump_entry_code(entry), &code, JUMP_LABEL_NOP_SIZE, NULL);
9493
}
9594

9695
void arch_jump_label_transform(struct jump_entry *entry,
@@ -147,11 +146,9 @@ bool arch_jump_label_transform_queue(struct jump_entry *entry,
147146
}
148147

149148
__jump_label_set_jump_code(entry, type,
150-
(union jump_code_union *) &tp->opcode, 0);
149+
(union jump_code_union *)&tp->text, 0);
151150

152-
tp->addr = entry_code;
153-
tp->detour = entry_code + JUMP_LABEL_NOP_SIZE;
154-
tp->len = JUMP_LABEL_NOP_SIZE;
151+
text_poke_loc_init(tp, entry_code, NULL, JUMP_LABEL_NOP_SIZE, NULL);
155152

156153
tp_vec_nr++;
157154

arch/x86/kernel/kprobes/opt.c

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -437,8 +437,7 @@ void arch_optimize_kprobes(struct list_head *oplist)
437437
insn_buff[0] = RELATIVEJUMP_OPCODE;
438438
*(s32 *)(&insn_buff[1]) = rel;
439439

440-
text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE,
441-
op->optinsn.insn);
440+
text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE, NULL);
442441

443442
list_del_init(&op->list);
444443
}
@@ -448,12 +447,18 @@ void arch_optimize_kprobes(struct list_head *oplist)
448447
void arch_unoptimize_kprobe(struct optimized_kprobe *op)
449448
{
450449
u8 insn_buff[RELATIVEJUMP_SIZE];
450+
u8 emulate_buff[RELATIVEJUMP_SIZE];
451451

452452
/* Set int3 to first byte for kprobes */
453453
insn_buff[0] = BREAKPOINT_INSTRUCTION;
454454
memcpy(insn_buff + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
455+
456+
emulate_buff[0] = RELATIVEJUMP_OPCODE;
457+
*(s32 *)(&emulate_buff[1]) = (s32)((long)op->optinsn.insn -
458+
((long)op->kp.addr + RELATIVEJUMP_SIZE));
459+
455460
text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE,
456-
op->optinsn.insn);
461+
emulate_buff);
457462
}
458463

459464
/*

0 commit comments

Comments
 (0)