Skip to content

Commit 98a96f2

Browse files
committed
Merge branch 'x86-vdso-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 vdso updates from Ingo Molnar: "Further simplifications and improvements to the VDSO code, by Andy Lutomirski" * 'x86-vdso-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86_64/vsyscall: Fix warn_bad_vsyscall log output x86/vdso: Set VM_MAYREAD for the vvar vma x86, vdso: Get rid of the fake section mechanism x86, vdso: Move the vvar area before the vdso text
2 parents 5637a2a + 53b884a commit 98a96f2

File tree

8 files changed

+193
-291
lines changed

8 files changed

+193
-291
lines changed

arch/x86/include/asm/vdso.h

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,15 @@ struct vdso_image {
1818

1919
unsigned long alt, alt_len;
2020

21-
unsigned long sym_end_mapping; /* Total size of the mapping */
22-
23-
unsigned long sym_vvar_page;
24-
unsigned long sym_hpet_page;
25-
unsigned long sym_VDSO32_NOTE_MASK;
26-
unsigned long sym___kernel_sigreturn;
27-
unsigned long sym___kernel_rt_sigreturn;
28-
unsigned long sym___kernel_vsyscall;
29-
unsigned long sym_VDSO32_SYSENTER_RETURN;
21+
long sym_vvar_start; /* Negative offset to the vvar area */
22+
23+
long sym_vvar_page;
24+
long sym_hpet_page;
25+
long sym_VDSO32_NOTE_MASK;
26+
long sym___kernel_sigreturn;
27+
long sym___kernel_rt_sigreturn;
28+
long sym___kernel_vsyscall;
29+
long sym_VDSO32_SYSENTER_RETURN;
3030
};
3131

3232
#ifdef CONFIG_X86_64

arch/x86/kernel/vsyscall_64.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -81,10 +81,10 @@ static void warn_bad_vsyscall(const char *level, struct pt_regs *regs,
8181
if (!show_unhandled_signals)
8282
return;
8383

84-
pr_notice_ratelimited("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n",
85-
level, current->comm, task_pid_nr(current),
86-
message, regs->ip, regs->cs,
87-
regs->sp, regs->ax, regs->si, regs->di);
84+
printk_ratelimited("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n",
85+
level, current->comm, task_pid_nr(current),
86+
message, regs->ip, regs->cs,
87+
regs->sp, regs->ax, regs->si, regs->di);
8888
}
8989

9090
static int addr_to_vsyscall_nr(unsigned long addr)

arch/x86/vdso/Makefile

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ VDSO32-$(CONFIG_X86_32) := y
1010
VDSO32-$(CONFIG_COMPAT) := y
1111

1212
# files to link into the vdso
13-
vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o vdso-fakesections.o
13+
vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o
1414

1515
# files to link into kernel
1616
obj-y += vma.o
@@ -37,7 +37,8 @@ vdso_img_sodbg := $(vdso_img-y:%=vdso%.so.dbg)
3737
obj-y += $(vdso_img_objs)
3838
targets += $(vdso_img_cfiles)
3939
targets += $(vdso_img_sodbg)
40-
.SECONDARY: $(vdso_img-y:%=$(obj)/vdso-image-%.c)
40+
.SECONDARY: $(vdso_img-y:%=$(obj)/vdso-image-%.c) \
41+
$(vdso_img-y:%=$(obj)/vdso%.so)
4142

4243
export CPPFLAGS_vdso.lds += -P -C
4344

@@ -54,10 +55,10 @@ hostprogs-y += vdso2c
5455

5556
quiet_cmd_vdso2c = VDSO2C $@
5657
define cmd_vdso2c
57-
$(obj)/vdso2c $< $@
58+
$(obj)/vdso2c $< $(<:%.dbg=%) $@
5859
endef
5960

60-
$(obj)/vdso-image-%.c: $(obj)/vdso%.so.dbg $(obj)/vdso2c FORCE
61+
$(obj)/vdso-image-%.c: $(obj)/vdso%.so.dbg $(obj)/vdso%.so $(obj)/vdso2c FORCE
6162
$(call if_changed,vdso2c)
6263

6364
#
@@ -113,6 +114,10 @@ $(obj)/%-x32.o: $(obj)/%.o FORCE
113114

114115
targets += vdsox32.lds $(vobjx32s-y)
115116

117+
$(obj)/%.so: OBJCOPYFLAGS := -S
118+
$(obj)/%.so: $(obj)/%.so.dbg
119+
$(call if_changed,objcopy)
120+
116121
$(obj)/vdsox32.so.dbg: $(src)/vdsox32.lds $(vobjx32s) FORCE
117122
$(call if_changed,vdso)
118123

@@ -134,7 +139,7 @@ override obj-dirs = $(dir $(obj)) $(obj)/vdso32/
134139

135140
targets += vdso32/vdso32.lds
136141
targets += vdso32/note.o vdso32/vclock_gettime.o $(vdso32.so-y:%=vdso32/%.o)
137-
targets += vdso32/vclock_gettime.o vdso32/vdso-fakesections.o
142+
targets += vdso32/vclock_gettime.o
138143

139144
$(obj)/vdso32.o: $(vdso32-images:%=$(obj)/%)
140145

@@ -156,7 +161,6 @@ $(vdso32-images:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_32)
156161
$(vdso32-images:%=$(obj)/%.dbg): $(obj)/vdso32-%.so.dbg: FORCE \
157162
$(obj)/vdso32/vdso32.lds \
158163
$(obj)/vdso32/vclock_gettime.o \
159-
$(obj)/vdso32/vdso-fakesections.o \
160164
$(obj)/vdso32/note.o \
161165
$(obj)/vdso32/%.o
162166
$(call if_changed,vdso)

arch/x86/vdso/vdso-fakesections.c

Lines changed: 0 additions & 21 deletions
This file was deleted.

arch/x86/vdso/vdso-layout.lds.S

Lines changed: 19 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,25 @@
1818

1919
SECTIONS
2020
{
21+
/*
22+
* User/kernel shared data is before the vDSO. This may be a little
23+
* uglier than putting it after the vDSO, but it avoids issues with
24+
* non-allocatable things that dangle past the end of the PT_LOAD
25+
* segment.
26+
*/
27+
28+
vvar_start = . - 2 * PAGE_SIZE;
29+
vvar_page = vvar_start;
30+
31+
/* Place all vvars at the offsets in asm/vvar.h. */
32+
#define EMIT_VVAR(name, offset) vvar_ ## name = vvar_page + offset;
33+
#define __VVAR_KERNEL_LDS
34+
#include <asm/vvar.h>
35+
#undef __VVAR_KERNEL_LDS
36+
#undef EMIT_VVAR
37+
38+
hpet_page = vvar_start + PAGE_SIZE;
39+
2140
. = SIZEOF_HEADERS;
2241

2342
.hash : { *(.hash) } :text
@@ -74,31 +93,6 @@ SECTIONS
7493
.altinstructions : { *(.altinstructions) } :text
7594
.altinstr_replacement : { *(.altinstr_replacement) } :text
7695

77-
/*
78-
* The remainder of the vDSO consists of special pages that are
79-
* shared between the kernel and userspace. It needs to be at the
80-
* end so that it doesn't overlap the mapping of the actual
81-
* vDSO image.
82-
*/
83-
84-
. = ALIGN(PAGE_SIZE);
85-
vvar_page = .;
86-
87-
/* Place all vvars at the offsets in asm/vvar.h. */
88-
#define EMIT_VVAR(name, offset) vvar_ ## name = vvar_page + offset;
89-
#define __VVAR_KERNEL_LDS
90-
#include <asm/vvar.h>
91-
#undef __VVAR_KERNEL_LDS
92-
#undef EMIT_VVAR
93-
94-
. = vvar_page + PAGE_SIZE;
95-
96-
hpet_page = .;
97-
. = . + PAGE_SIZE;
98-
99-
. = ALIGN(PAGE_SIZE);
100-
end_mapping = .;
101-
10296
/DISCARD/ : {
10397
*(.discard)
10498
*(.discard.*)

arch/x86/vdso/vdso2c.c

Lines changed: 98 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,53 @@
1+
/*
2+
* vdso2c - A vdso image preparation tool
3+
* Copyright (c) 2014 Andy Lutomirski and others
4+
* Licensed under the GPL v2
5+
*
6+
* vdso2c requires stripped and unstripped input. It would be trivial
7+
* to fully strip the input in here, but, for reasons described below,
8+
* we need to write a section table. Doing this is more or less
9+
* equivalent to dropping all non-allocatable sections, but it's
10+
* easier to let objcopy handle that instead of doing it ourselves.
11+
* If we ever need to do something fancier than what objcopy provides,
12+
* it would be straightforward to add here.
13+
*
14+
* We're keep a section table for a few reasons:
15+
*
16+
* The Go runtime had a couple of bugs: it would read the section
17+
* table to try to figure out how many dynamic symbols there were (it
18+
* shouldn't have looked at the section table at all) and, if there
19+
* were no SHT_SYNDYM section table entry, it would use an
20+
* uninitialized value for the number of symbols. An empty DYNSYM
21+
* table would work, but I see no reason not to write a valid one (and
22+
* keep full performance for old Go programs). This hack is only
23+
* needed on x86_64.
24+
*
25+
* The bug was introduced on 2012-08-31 by:
26+
* https://code.google.com/p/go/source/detail?r=56ea40aac72b
27+
* and was fixed on 2014-06-13 by:
28+
* https://code.google.com/p/go/source/detail?r=fc1cd5e12595
29+
*
30+
* Binutils has issues debugging the vDSO: it reads the section table to
31+
* find SHT_NOTE; it won't look at PT_NOTE for the in-memory vDSO, which
32+
* would break build-id if we removed the section table. Binutils
33+
* also requires that shstrndx != 0. See:
34+
* https://sourceware.org/bugzilla/show_bug.cgi?id=17064
35+
*
36+
* elfutils might not look for PT_NOTE if there is a section table at
37+
* all. I don't know whether this matters for any practical purpose.
38+
*
39+
* For simplicity, rather than hacking up a partial section table, we
40+
* just write a mostly complete one. We omit non-dynamic symbols,
41+
* though, since they're rather large.
42+
*
43+
* Once binutils gets fixed, we might be able to drop this for all but
44+
* the 64-bit vdso, since build-id only works in kernel RPMs, and
45+
* systems that update to new enough kernel RPMs will likely update
46+
* binutils in sync. build-id has never worked for home-built kernel
47+
* RPMs without manual symlinking, and I suspect that no one ever does
48+
* that.
49+
*/
50+
151
#include <inttypes.h>
252
#include <stdint.h>
353
#include <unistd.h>
@@ -20,9 +70,9 @@ const char *outfilename;
2070

2171
/* Symbols that we need in vdso2c. */
2272
enum {
73+
sym_vvar_start,
2374
sym_vvar_page,
2475
sym_hpet_page,
25-
sym_end_mapping,
2676
sym_VDSO_FAKE_SECTION_TABLE_START,
2777
sym_VDSO_FAKE_SECTION_TABLE_END,
2878
};
@@ -38,9 +88,9 @@ struct vdso_sym {
3888
};
3989

4090
struct vdso_sym required_syms[] = {
91+
[sym_vvar_start] = {"vvar_start", true},
4192
[sym_vvar_page] = {"vvar_page", true},
4293
[sym_hpet_page] = {"hpet_page", true},
43-
[sym_end_mapping] = {"end_mapping", true},
4494
[sym_VDSO_FAKE_SECTION_TABLE_START] = {
4595
"VDSO_FAKE_SECTION_TABLE_START", false
4696
},
@@ -61,7 +111,8 @@ static void fail(const char *format, ...)
61111
va_start(ap, format);
62112
fprintf(stderr, "Error: ");
63113
vfprintf(stderr, format, ap);
64-
unlink(outfilename);
114+
if (outfilename)
115+
unlink(outfilename);
65116
exit(1);
66117
va_end(ap);
67118
}
@@ -96,9 +147,11 @@ extern void bad_put_le(void);
96147

97148
#define NSYMS (sizeof(required_syms) / sizeof(required_syms[0]))
98149

99-
#define BITSFUNC3(name, bits) name##bits
100-
#define BITSFUNC2(name, bits) BITSFUNC3(name, bits)
101-
#define BITSFUNC(name) BITSFUNC2(name, ELF_BITS)
150+
#define BITSFUNC3(name, bits, suffix) name##bits##suffix
151+
#define BITSFUNC2(name, bits, suffix) BITSFUNC3(name, bits, suffix)
152+
#define BITSFUNC(name) BITSFUNC2(name, ELF_BITS, )
153+
154+
#define INT_BITS BITSFUNC2(int, ELF_BITS, _t)
102155

103156
#define ELF_BITS_XFORM2(bits, x) Elf##bits##_##x
104157
#define ELF_BITS_XFORM(bits, x) ELF_BITS_XFORM2(bits, x)
@@ -112,38 +165,61 @@ extern void bad_put_le(void);
112165
#include "vdso2c.h"
113166
#undef ELF_BITS
114167

115-
static void go(void *addr, size_t len, FILE *outfile, const char *name)
168+
static void go(void *raw_addr, size_t raw_len,
169+
void *stripped_addr, size_t stripped_len,
170+
FILE *outfile, const char *name)
116171
{
117-
Elf64_Ehdr *hdr = (Elf64_Ehdr *)addr;
172+
Elf64_Ehdr *hdr = (Elf64_Ehdr *)raw_addr;
118173

119174
if (hdr->e_ident[EI_CLASS] == ELFCLASS64) {
120-
go64(addr, len, outfile, name);
175+
go64(raw_addr, raw_len, stripped_addr, stripped_len,
176+
outfile, name);
121177
} else if (hdr->e_ident[EI_CLASS] == ELFCLASS32) {
122-
go32(addr, len, outfile, name);
178+
go32(raw_addr, raw_len, stripped_addr, stripped_len,
179+
outfile, name);
123180
} else {
124181
fail("unknown ELF class\n");
125182
}
126183
}
127184

185+
static void map_input(const char *name, void **addr, size_t *len, int prot)
186+
{
187+
off_t tmp_len;
188+
189+
int fd = open(name, O_RDONLY);
190+
if (fd == -1)
191+
err(1, "%s", name);
192+
193+
tmp_len = lseek(fd, 0, SEEK_END);
194+
if (tmp_len == (off_t)-1)
195+
err(1, "lseek");
196+
*len = (size_t)tmp_len;
197+
198+
*addr = mmap(NULL, tmp_len, prot, MAP_PRIVATE, fd, 0);
199+
if (*addr == MAP_FAILED)
200+
err(1, "mmap");
201+
202+
close(fd);
203+
}
204+
128205
int main(int argc, char **argv)
129206
{
130-
int fd;
131-
off_t len;
132-
void *addr;
207+
size_t raw_len, stripped_len;
208+
void *raw_addr, *stripped_addr;
133209
FILE *outfile;
134210
char *name, *tmp;
135211
int namelen;
136212

137-
if (argc != 3) {
138-
printf("Usage: vdso2c INPUT OUTPUT\n");
213+
if (argc != 4) {
214+
printf("Usage: vdso2c RAW_INPUT STRIPPED_INPUT OUTPUT\n");
139215
return 1;
140216
}
141217

142218
/*
143219
* Figure out the struct name. If we're writing to a .so file,
144220
* generate raw output insted.
145221
*/
146-
name = strdup(argv[2]);
222+
name = strdup(argv[3]);
147223
namelen = strlen(name);
148224
if (namelen >= 3 && !strcmp(name + namelen - 3, ".so")) {
149225
name = NULL;
@@ -159,26 +235,18 @@ int main(int argc, char **argv)
159235
*tmp = '_';
160236
}
161237

162-
fd = open(argv[1], O_RDONLY);
163-
if (fd == -1)
164-
err(1, "%s", argv[1]);
165-
166-
len = lseek(fd, 0, SEEK_END);
167-
if (len == (off_t)-1)
168-
err(1, "lseek");
169-
170-
addr = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
171-
if (addr == MAP_FAILED)
172-
err(1, "mmap");
238+
map_input(argv[1], &raw_addr, &raw_len, PROT_READ);
239+
map_input(argv[2], &stripped_addr, &stripped_len, PROT_READ);
173240

174-
outfilename = argv[2];
241+
outfilename = argv[3];
175242
outfile = fopen(outfilename, "w");
176243
if (!outfile)
177244
err(1, "%s", argv[2]);
178245

179-
go(addr, (size_t)len, outfile, name);
246+
go(raw_addr, raw_len, stripped_addr, stripped_len, outfile, name);
180247

181-
munmap(addr, len);
248+
munmap(raw_addr, raw_len);
249+
munmap(stripped_addr, stripped_len);
182250
fclose(outfile);
183251

184252
return 0;

0 commit comments

Comments
 (0)