Skip to content

Commit d4b3b63

Browse files
srikardIngo Molnar
authored andcommitted
uprobes/core: Allocate XOL slots for uprobes use
Uprobes executes the original instruction at a probed location out of line. For this, we allocate a page (per mm) upon the first uprobe hit, in the process user address space, divide it into slots that are used to store the actual instructions to be singlestepped. These slots are known as xol (execution out of line) slots. Care is taken to ensure that the allocation is in an unmapped area as close to the top of the user address space as possible, with appropriate permission settings to keep selinux like frameworks happy. Upon a uprobe hit, a free slot is acquired, and is released after the singlestep completes. Lots of improvements courtesy suggestions/inputs from Peter and Oleg. [ Folded a fix for build issue on powerpc fixed and reported by Stephen Rothwell. ] Signed-off-by: Srikar Dronamraju <[email protected]> Cc: Linus Torvalds <[email protected]> Cc: Ananth N Mavinakayanahalli <[email protected]> Cc: Jim Keniston <[email protected]> Cc: Linux-mm <[email protected]> Cc: Oleg Nesterov <[email protected]> Cc: Andi Kleen <[email protected]> Cc: Christoph Hellwig <[email protected]> Cc: Steven Rostedt <[email protected]> Cc: Arnaldo Carvalho de Melo <[email protected]> Cc: Masami Hiramatsu <[email protected]> Cc: Anton Arapov <[email protected]> Cc: Peter Zijlstra <[email protected]> Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Ingo Molnar <[email protected]>
1 parent 0326f5a commit d4b3b63

File tree

4 files changed

+253
-0
lines changed

4 files changed

+253
-0
lines changed

include/linux/mm_types.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include <linux/completion.h>
1313
#include <linux/cpumask.h>
1414
#include <linux/page-debug-flags.h>
15+
#include <linux/uprobes.h>
1516
#include <asm/page.h>
1617
#include <asm/mmu.h>
1718

@@ -388,6 +389,7 @@ struct mm_struct {
388389
#ifdef CONFIG_CPUMASK_OFFSTACK
389390
struct cpumask cpumask_allocation;
390391
#endif
392+
struct uprobes_state uprobes_state;
391393
};
392394

393395
static inline void mm_init_cpumask(struct mm_struct *mm)

include/linux/uprobes.h

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@
2828
#include <linux/rbtree.h>
2929

3030
struct vm_area_struct;
31+
struct mm_struct;
32+
struct inode;
3133

3234
#ifdef CONFIG_ARCH_SUPPORTS_UPROBES
3335
# include <asm/uprobes.h>
@@ -76,6 +78,28 @@ struct uprobe_task {
7678
unsigned long vaddr;
7779
};
7880

81+
/*
82+
* On a breakpoint hit, thread contests for a slot. It frees the
83+
* slot after singlestep. Currently a fixed number of slots are
84+
* allocated.
85+
*/
86+
struct xol_area {
87+
wait_queue_head_t wq; /* if all slots are busy */
88+
atomic_t slot_count; /* number of in-use slots */
89+
unsigned long *bitmap; /* 0 = free slot */
90+
struct page *page;
91+
92+
/*
93+
* We keep the vma's vm_start rather than a pointer to the vma
94+
* itself. The probed process or a naughty kernel module could make
95+
* the vma go away, and we must handle that reasonably gracefully.
96+
*/
97+
unsigned long vaddr; /* Page(s) of instruction slots */
98+
};
99+
100+
struct uprobes_state {
101+
struct xol_area *xol_area;
102+
};
79103
extern int __weak set_swbp(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr);
80104
extern int __weak set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr, bool verify);
81105
extern bool __weak is_swbp_insn(uprobe_opcode_t *insn);
@@ -90,7 +114,11 @@ extern int uprobe_pre_sstep_notifier(struct pt_regs *regs);
90114
extern void uprobe_notify_resume(struct pt_regs *regs);
91115
extern bool uprobe_deny_signal(void);
92116
extern bool __weak arch_uprobe_skip_sstep(struct arch_uprobe *aup, struct pt_regs *regs);
117+
extern void uprobe_clear_state(struct mm_struct *mm);
118+
extern void uprobe_reset_state(struct mm_struct *mm);
93119
#else /* !CONFIG_UPROBES */
120+
struct uprobes_state {
121+
};
94122
static inline int
95123
uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc)
96124
{
@@ -121,5 +149,11 @@ static inline void uprobe_free_utask(struct task_struct *t)
121149
static inline void uprobe_copy_process(struct task_struct *t)
122150
{
123151
}
152+
static inline void uprobe_clear_state(struct mm_struct *mm)
153+
{
154+
}
155+
static inline void uprobe_reset_state(struct mm_struct *mm)
156+
{
157+
}
124158
#endif /* !CONFIG_UPROBES */
125159
#endif /* _LINUX_UPROBES_H */

kernel/events/uprobes.c

Lines changed: 215 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,9 @@
3535

3636
#include <linux/uprobes.h>
3737

38+
#define UINSNS_PER_PAGE (PAGE_SIZE/UPROBE_XOL_SLOT_BYTES)
39+
#define MAX_UPROBE_XOL_SLOTS UINSNS_PER_PAGE
40+
3841
static struct srcu_struct uprobes_srcu;
3942
static struct rb_root uprobes_tree = RB_ROOT;
4043

@@ -1042,6 +1045,213 @@ int uprobe_mmap(struct vm_area_struct *vma)
10421045
return ret;
10431046
}
10441047

1048+
/* Slot allocation for XOL */
1049+
static int xol_add_vma(struct xol_area *area)
1050+
{
1051+
struct mm_struct *mm;
1052+
int ret;
1053+
1054+
area->page = alloc_page(GFP_HIGHUSER);
1055+
if (!area->page)
1056+
return -ENOMEM;
1057+
1058+
ret = -EALREADY;
1059+
mm = current->mm;
1060+
1061+
down_write(&mm->mmap_sem);
1062+
if (mm->uprobes_state.xol_area)
1063+
goto fail;
1064+
1065+
ret = -ENOMEM;
1066+
1067+
/* Try to map as high as possible, this is only a hint. */
1068+
area->vaddr = get_unmapped_area(NULL, TASK_SIZE - PAGE_SIZE, PAGE_SIZE, 0, 0);
1069+
if (area->vaddr & ~PAGE_MASK) {
1070+
ret = area->vaddr;
1071+
goto fail;
1072+
}
1073+
1074+
ret = install_special_mapping(mm, area->vaddr, PAGE_SIZE,
1075+
VM_EXEC|VM_MAYEXEC|VM_DONTCOPY|VM_IO, &area->page);
1076+
if (ret)
1077+
goto fail;
1078+
1079+
smp_wmb(); /* pairs with get_xol_area() */
1080+
mm->uprobes_state.xol_area = area;
1081+
ret = 0;
1082+
1083+
fail:
1084+
up_write(&mm->mmap_sem);
1085+
if (ret)
1086+
__free_page(area->page);
1087+
1088+
return ret;
1089+
}
1090+
1091+
static struct xol_area *get_xol_area(struct mm_struct *mm)
1092+
{
1093+
struct xol_area *area;
1094+
1095+
area = mm->uprobes_state.xol_area;
1096+
smp_read_barrier_depends(); /* pairs with wmb in xol_add_vma() */
1097+
1098+
return area;
1099+
}
1100+
1101+
/*
1102+
* xol_alloc_area - Allocate process's xol_area.
1103+
* This area will be used for storing instructions for execution out of
1104+
* line.
1105+
*
1106+
* Returns the allocated area or NULL.
1107+
*/
1108+
static struct xol_area *xol_alloc_area(void)
1109+
{
1110+
struct xol_area *area;
1111+
1112+
area = kzalloc(sizeof(*area), GFP_KERNEL);
1113+
if (unlikely(!area))
1114+
return NULL;
1115+
1116+
area->bitmap = kzalloc(BITS_TO_LONGS(UINSNS_PER_PAGE) * sizeof(long), GFP_KERNEL);
1117+
1118+
if (!area->bitmap)
1119+
goto fail;
1120+
1121+
init_waitqueue_head(&area->wq);
1122+
if (!xol_add_vma(area))
1123+
return area;
1124+
1125+
fail:
1126+
kfree(area->bitmap);
1127+
kfree(area);
1128+
1129+
return get_xol_area(current->mm);
1130+
}
1131+
1132+
/*
1133+
* uprobe_clear_state - Free the area allocated for slots.
1134+
*/
1135+
void uprobe_clear_state(struct mm_struct *mm)
1136+
{
1137+
struct xol_area *area = mm->uprobes_state.xol_area;
1138+
1139+
if (!area)
1140+
return;
1141+
1142+
put_page(area->page);
1143+
kfree(area->bitmap);
1144+
kfree(area);
1145+
}
1146+
1147+
/*
1148+
* uprobe_reset_state - Free the area allocated for slots.
1149+
*/
1150+
void uprobe_reset_state(struct mm_struct *mm)
1151+
{
1152+
mm->uprobes_state.xol_area = NULL;
1153+
}
1154+
1155+
/*
1156+
* - search for a free slot.
1157+
*/
1158+
static unsigned long xol_take_insn_slot(struct xol_area *area)
1159+
{
1160+
unsigned long slot_addr;
1161+
int slot_nr;
1162+
1163+
do {
1164+
slot_nr = find_first_zero_bit(area->bitmap, UINSNS_PER_PAGE);
1165+
if (slot_nr < UINSNS_PER_PAGE) {
1166+
if (!test_and_set_bit(slot_nr, area->bitmap))
1167+
break;
1168+
1169+
slot_nr = UINSNS_PER_PAGE;
1170+
continue;
1171+
}
1172+
wait_event(area->wq, (atomic_read(&area->slot_count) < UINSNS_PER_PAGE));
1173+
} while (slot_nr >= UINSNS_PER_PAGE);
1174+
1175+
slot_addr = area->vaddr + (slot_nr * UPROBE_XOL_SLOT_BYTES);
1176+
atomic_inc(&area->slot_count);
1177+
1178+
return slot_addr;
1179+
}
1180+
1181+
/*
1182+
* xol_get_insn_slot - If was not allocated a slot, then
1183+
* allocate a slot.
1184+
* Returns the allocated slot address or 0.
1185+
*/
1186+
static unsigned long xol_get_insn_slot(struct uprobe *uprobe, unsigned long slot_addr)
1187+
{
1188+
struct xol_area *area;
1189+
unsigned long offset;
1190+
void *vaddr;
1191+
1192+
area = get_xol_area(current->mm);
1193+
if (!area) {
1194+
area = xol_alloc_area();
1195+
if (!area)
1196+
return 0;
1197+
}
1198+
current->utask->xol_vaddr = xol_take_insn_slot(area);
1199+
1200+
/*
1201+
* Initialize the slot if xol_vaddr points to valid
1202+
* instruction slot.
1203+
*/
1204+
if (unlikely(!current->utask->xol_vaddr))
1205+
return 0;
1206+
1207+
current->utask->vaddr = slot_addr;
1208+
offset = current->utask->xol_vaddr & ~PAGE_MASK;
1209+
vaddr = kmap_atomic(area->page);
1210+
memcpy(vaddr + offset, uprobe->arch.insn, MAX_UINSN_BYTES);
1211+
kunmap_atomic(vaddr);
1212+
1213+
return current->utask->xol_vaddr;
1214+
}
1215+
1216+
/*
1217+
* xol_free_insn_slot - If slot was earlier allocated by
1218+
* @xol_get_insn_slot(), make the slot available for
1219+
* subsequent requests.
1220+
*/
1221+
static void xol_free_insn_slot(struct task_struct *tsk)
1222+
{
1223+
struct xol_area *area;
1224+
unsigned long vma_end;
1225+
unsigned long slot_addr;
1226+
1227+
if (!tsk->mm || !tsk->mm->uprobes_state.xol_area || !tsk->utask)
1228+
return;
1229+
1230+
slot_addr = tsk->utask->xol_vaddr;
1231+
1232+
if (unlikely(!slot_addr || IS_ERR_VALUE(slot_addr)))
1233+
return;
1234+
1235+
area = tsk->mm->uprobes_state.xol_area;
1236+
vma_end = area->vaddr + PAGE_SIZE;
1237+
if (area->vaddr <= slot_addr && slot_addr < vma_end) {
1238+
unsigned long offset;
1239+
int slot_nr;
1240+
1241+
offset = slot_addr - area->vaddr;
1242+
slot_nr = offset / UPROBE_XOL_SLOT_BYTES;
1243+
if (slot_nr >= UINSNS_PER_PAGE)
1244+
return;
1245+
1246+
clear_bit(slot_nr, area->bitmap);
1247+
atomic_dec(&area->slot_count);
1248+
if (waitqueue_active(&area->wq))
1249+
wake_up(&area->wq);
1250+
1251+
tsk->utask->xol_vaddr = 0;
1252+
}
1253+
}
1254+
10451255
/**
10461256
* uprobe_get_swbp_addr - compute address of swbp given post-swbp regs
10471257
* @regs: Reflects the saved state of the task after it has hit a breakpoint
@@ -1070,6 +1280,7 @@ void uprobe_free_utask(struct task_struct *t)
10701280
if (utask->active_uprobe)
10711281
put_uprobe(utask->active_uprobe);
10721282

1283+
xol_free_insn_slot(t);
10731284
kfree(utask);
10741285
t->utask = NULL;
10751286
}
@@ -1108,6 +1319,9 @@ static struct uprobe_task *add_utask(void)
11081319
static int
11091320
pre_ssout(struct uprobe *uprobe, struct pt_regs *regs, unsigned long vaddr)
11101321
{
1322+
if (xol_get_insn_slot(uprobe, vaddr) && !arch_uprobe_pre_xol(&uprobe->arch, regs))
1323+
return 0;
1324+
11111325
return -EFAULT;
11121326
}
11131327

@@ -1252,6 +1466,7 @@ static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs)
12521466
utask->active_uprobe = NULL;
12531467
utask->state = UTASK_RUNNING;
12541468
user_disable_single_step(current);
1469+
xol_free_insn_slot(current);
12551470

12561471
spin_lock_irq(&current->sighand->siglock);
12571472
recalc_sigpending(); /* see uprobe_deny_signal() */

kernel/fork.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -554,6 +554,7 @@ void mmput(struct mm_struct *mm)
554554
might_sleep();
555555

556556
if (atomic_dec_and_test(&mm->mm_users)) {
557+
uprobe_clear_state(mm);
557558
exit_aio(mm);
558559
ksm_exit(mm);
559560
khugepaged_exit(mm); /* must run before exit_mmap */
@@ -760,6 +761,7 @@ struct mm_struct *dup_mm(struct task_struct *tsk)
760761
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
761762
mm->pmd_huge_pte = NULL;
762763
#endif
764+
uprobe_reset_state(mm);
763765

764766
if (!mm_init(mm, tsk))
765767
goto fail_nomem;

0 commit comments

Comments
 (0)