Skip to content

Commit 8dbc450

Browse files
committed
Merge branch 'sparc-vdso'
sparc: VDSO improvements I started out on these changes with the goal of improving perf annotations when the VDSO is in use. Due to lack of inlining the helper functions are typically hit when profiling instead of __vdso_gettimeoday() or __vdso_vclock_gettime(). The only symbols available by default are the dyanmic symbols, which therefore doesn't cover the helper functions. So the perf output looks terrible, because the symbols cannot be resolved and all show up as "Unknown". The sparc VDSO code forces no inlining because of the way the simplistic %tick register read code patching works. So fixing that was the first order of business. Tricks were taken from how x86 implements alternates. The crucial factor is that if you want to refer to locations (for the original and patch instruction(s)) you have to do so in a way that is resolvable at link time even for a shared object. So you have to do this by storing PC-relative values, and not in executable sections. Next, we sanitize the Makefile so that the cflags et al. make more sense. And LDFLAGS are applied actually to invocations of LD instead of CC. We also add some sanity checking, specifically in a post-link check that makes sure we don't have any unexpected unresolved symbols in the VDSO. This is essential because the dynamic linker cannot resolve symbols in the VDSO because it cannot write to it. Finally some very minor optimizations are preformed to the vclock_gettime.c code. One thing which is tricky with this code on sparc is that struct timeval and struct timespec are layed out differently on 64-bit. This is because, unlike other architectures, sparc defined suseconds_t as 'int' even on 64-bit. This is why we have all of the "union" tstv_t" business and the weird assignments in __vdso_gettimeofday(). Performance wise we do gain some cycle shere, specifically here are cycle counts for a user application calling gettimeofday(): no-VDSO VDSO-orig VDSO-new ================================================ 64-bit 853 cycles 112 cycles 125 cycles 32-bit 849 cycles 134 cycles 141 cycles These results are with current glibc sources. To get better we'd need to implement this in assembler, and I might just do that at some point. Signed-off-by: David S. Miller <[email protected]>
2 parents 46b8306 + 19832d2 commit 8dbc450

File tree

10 files changed

+121
-107
lines changed

10 files changed

+121
-107
lines changed

arch/sparc/include/asm/thread_info_64.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,8 +121,12 @@ struct thread_info {
121121
}
122122

123123
/* how to get the thread information struct from C */
124+
#ifndef BUILD_VDSO
124125
register struct thread_info *current_thread_info_reg asm("g6");
125126
#define current_thread_info() (current_thread_info_reg)
127+
#else
128+
extern struct thread_info *current_thread_info(void);
129+
#endif
126130

127131
/* thread information allocation */
128132
#if PAGE_SHIFT == 13

arch/sparc/include/asm/vdso.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,10 @@
88
struct vdso_image {
99
void *data;
1010
unsigned long size; /* Always a multiple of PAGE_SIZE */
11+
12+
unsigned long tick_patch, tick_patch_len;
13+
1114
long sym_vvar_start; /* Negative offset to the vvar area */
12-
long sym_vread_tick; /* Start of vread_tick section */
13-
long sym_vread_tick_patch_start; /* Start of tick read */
14-
long sym_vread_tick_patch_end; /* End of tick read */
1515
};
1616

1717
#ifdef CONFIG_SPARC64

arch/sparc/kernel/time_64.c

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,6 @@
5353

5454
DEFINE_SPINLOCK(rtc_lock);
5555

56-
unsigned int __read_mostly vdso_fix_stick;
57-
5856
#ifdef CONFIG_SMP
5957
unsigned long profile_pc(struct pt_regs *regs)
6058
{
@@ -838,7 +836,6 @@ void __init time_init_early(void)
838836
} else {
839837
init_tick_ops(&tick_operations);
840838
clocksource_tick.archdata.vclock_mode = VCLOCK_TICK;
841-
vdso_fix_stick = 1;
842839
}
843840
} else {
844841
init_tick_ops(&stick_operations);

arch/sparc/vdso/Makefile

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,8 @@ targets += $(vdso_img_sodbg) $(vdso_img-y:%=vdso%.so)
3333

3434
CPPFLAGS_vdso.lds += -P -C
3535

36-
VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \
37-
-Wl,--no-undefined \
38-
-Wl,-z,max-page-size=8192 -Wl,-z,common-page-size=8192 \
39-
$(DISABLE_LTO)
36+
VDSO_LDFLAGS_vdso.lds = -m elf64_sparc -soname linux-vdso.so.1 --no-undefined \
37+
-z max-page-size=8192 -z common-page-size=8192
4038

4139
$(obj)/vdso64.so.dbg: $(obj)/vdso.lds $(vobjs) FORCE
4240
$(call if_changed,vdso)
@@ -54,13 +52,14 @@ $(obj)/vdso-image-%.c: $(obj)/vdso%.so.dbg $(obj)/vdso%.so $(obj)/vdso2c FORCE
5452
# Don't omit frame pointers for ease of userspace debugging, but do
5553
# optimize sibling calls.
5654
#
57-
CFL := $(PROFILING) -mcmodel=medlow -fPIC -O2 -fasynchronous-unwind-tables \
58-
-m64 -ffixed-g2 -ffixed-g3 -fcall-used-g4 -fcall-used-g5 -ffixed-g6 \
59-
-ffixed-g7 $(filter -g%,$(KBUILD_CFLAGS)) \
60-
$(call cc-option, -fno-stack-protector) -fno-omit-frame-pointer \
61-
-foptimize-sibling-calls -DBUILD_VDSO
55+
CFL := $(PROFILING) -mcmodel=medlow -fPIC -O2 -fasynchronous-unwind-tables -m64 \
56+
$(filter -g%,$(KBUILD_CFLAGS)) $(call cc-option, -fno-stack-protector) \
57+
-fno-omit-frame-pointer -foptimize-sibling-calls \
58+
-DDISABLE_BRANCH_PROFILING -DBUILD_VDSO
6259

63-
$(vobjs): KBUILD_CFLAGS += $(CFL)
60+
SPARC_REG_CFLAGS = -ffixed-g4 -ffixed-g5 -fcall-used-g5 -fcall-used-g7
61+
62+
$(vobjs): KBUILD_CFLAGS := $(filter-out $(GCC_PLUGINS_CFLAGS) $(SPARC_REG_CFLAGS),$(KBUILD_CFLAGS)) $(CFL)
6463

6564
#
6665
# vDSO code runs in userspace and -pg doesn't help with profiling anyway.
@@ -73,7 +72,7 @@ $(obj)/%.so: $(obj)/%.so.dbg
7372
$(call if_changed,objcopy)
7473

7574
CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds)
76-
VDSO_LDFLAGS_vdso32.lds = -m32 -Wl,-m,elf32_sparc,-soname=linux-gate.so.1
75+
VDSO_LDFLAGS_vdso32.lds = -m elf32_sparc -soname linux-gate.so.1
7776

7877
#This makes sure the $(obj) subdirectory exists even though vdso32/
7978
#is not a kbuild sub-make subdirectory
@@ -91,7 +90,8 @@ KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS))
9190
KBUILD_CFLAGS_32 := $(filter-out -mcmodel=medlow,$(KBUILD_CFLAGS_32))
9291
KBUILD_CFLAGS_32 := $(filter-out -fno-pic,$(KBUILD_CFLAGS_32))
9392
KBUILD_CFLAGS_32 := $(filter-out $(GCC_PLUGINS_CFLAGS),$(KBUILD_CFLAGS_32))
94-
KBUILD_CFLAGS_32 += -m32 -msoft-float -fpic -mno-app-regs -ffixed-g7
93+
KBUILD_CFLAGS_32 := $(filter-out $(SPARC_REG_CFLAGS),$(KBUILD_CFLAGS_32))
94+
KBUILD_CFLAGS_32 += -m32 -msoft-float -fpic
9595
KBUILD_CFLAGS_32 += $(call cc-option, -fno-stack-protector)
9696
KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls)
9797
KBUILD_CFLAGS_32 += -fno-omit-frame-pointer
@@ -109,12 +109,13 @@ $(obj)/vdso32.so.dbg: FORCE \
109109
# The DSO images are built using a special linker script.
110110
#
111111
quiet_cmd_vdso = VDSO $@
112-
cmd_vdso = $(CC) -nostdlib -o $@ \
112+
cmd_vdso = $(LD) -nostdlib -o $@ \
113113
$(VDSO_LDFLAGS) $(VDSO_LDFLAGS_$(filter %.lds,$(^F))) \
114-
-Wl,-T,$(filter %.lds,$^) $(filter %.o,$^)
114+
-T $(filter %.lds,$^) $(filter %.o,$^) && \
115+
sh $(srctree)/$(src)/checkundef.sh '$(OBJDUMP)' '$@'
115116

116-
VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) \
117-
$(call cc-ldoption, -Wl$(comma)--build-id) -Wl,-Bsymbolic
117+
VDSO_LDFLAGS = -shared $(call ld-option, --hash-style=both) \
118+
$(call ld-option, --build-id) -Bsymbolic
118119
GCOV_PROFILE := n
119120

120121
#

arch/sparc/vdso/checkundef.sh

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
#!/bin/sh
2+
objdump="$1"
3+
file="$2"
4+
$objdump -t "$file" | grep '*UUND*' | grep -v '#scratch' > /dev/null 2>&1
5+
if [ $? -eq 1 ]; then
6+
exit 0
7+
else
8+
echo "$file: undefined symbols found" >&2
9+
exit 1
10+
fi

arch/sparc/vdso/vclock_gettime.c

Lines changed: 46 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,6 @@
1212
* Copyright (c) 2017 Oracle and/or its affiliates. All rights reserved.
1313
*/
1414

15-
/* Disable profiling for userspace code: */
16-
#ifndef DISABLE_BRANCH_PROFILING
17-
#define DISABLE_BRANCH_PROFILING
18-
#endif
19-
2015
#include <linux/kernel.h>
2116
#include <linux/time.h>
2217
#include <linux/string.h>
@@ -26,13 +21,6 @@
2621
#include <asm/clocksource.h>
2722
#include <asm/vvar.h>
2823

29-
#undef TICK_PRIV_BIT
30-
#ifdef CONFIG_SPARC64
31-
#define TICK_PRIV_BIT (1UL << 63)
32-
#else
33-
#define TICK_PRIV_BIT (1ULL << 63)
34-
#endif
35-
3624
#ifdef CONFIG_SPARC64
3725
#define SYSCALL_STRING \
3826
"ta 0x6d;" \
@@ -60,24 +48,22 @@
6048
* Compute the vvar page's address in the process address space, and return it
6149
* as a pointer to the vvar_data.
6250
*/
63-
static notrace noinline struct vvar_data *
64-
get_vvar_data(void)
51+
notrace static __always_inline struct vvar_data *get_vvar_data(void)
6552
{
6653
unsigned long ret;
6754

6855
/*
69-
* vdso data page is the first vDSO page so grab the return address
56+
* vdso data page is the first vDSO page so grab the PC
7057
* and move up a page to get to the data page.
7158
*/
72-
ret = (unsigned long)__builtin_return_address(0);
59+
__asm__("rd %%pc, %0" : "=r" (ret));
7360
ret &= ~(8192 - 1);
7461
ret -= 8192;
7562

7663
return (struct vvar_data *) ret;
7764
}
7865

79-
static notrace long
80-
vdso_fallback_gettime(long clock, struct timespec *ts)
66+
notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
8167
{
8268
register long num __asm__("g1") = __NR_clock_gettime;
8369
register long o0 __asm__("o0") = clock;
@@ -88,8 +74,7 @@ vdso_fallback_gettime(long clock, struct timespec *ts)
8874
return o0;
8975
}
9076

91-
static notrace __always_inline long
92-
vdso_fallback_gettimeofday(struct timeval *tv, struct timezone *tz)
77+
notrace static long vdso_fallback_gettimeofday(struct timeval *tv, struct timezone *tz)
9378
{
9479
register long num __asm__("g1") = __NR_gettimeofday;
9580
register long o0 __asm__("o0") = (long) tv;
@@ -101,38 +86,43 @@ vdso_fallback_gettimeofday(struct timeval *tv, struct timezone *tz)
10186
}
10287

10388
#ifdef CONFIG_SPARC64
104-
static notrace noinline u64
105-
vread_tick(void) {
89+
notrace static __always_inline u64 vread_tick(void)
90+
{
10691
u64 ret;
10792

108-
__asm__ __volatile__("rd %%asr24, %0 \n"
109-
".section .vread_tick_patch, \"ax\" \n"
110-
"rd %%tick, %0 \n"
111-
".previous \n"
112-
: "=&r" (ret));
113-
return ret & ~TICK_PRIV_BIT;
93+
__asm__ __volatile__("1:\n\t"
94+
"rd %%tick, %0\n\t"
95+
".pushsection .tick_patch, \"a\"\n\t"
96+
".word 1b - ., 1f - .\n\t"
97+
".popsection\n\t"
98+
".pushsection .tick_patch_replacement, \"ax\"\n\t"
99+
"1:\n\t"
100+
"rd %%asr24, %0\n\t"
101+
".popsection\n"
102+
: "=r" (ret));
103+
return ret;
114104
}
115105
#else
116-
static notrace noinline u64
117-
vread_tick(void)
106+
notrace static __always_inline u64 vread_tick(void)
118107
{
119-
unsigned int lo, hi;
120-
121-
__asm__ __volatile__("rd %%asr24, %%g1\n\t"
122-
"srlx %%g1, 32, %1\n\t"
123-
"srl %%g1, 0, %0\n"
124-
".section .vread_tick_patch, \"ax\" \n"
125-
"rd %%tick, %%g1\n"
126-
".previous \n"
127-
: "=&r" (lo), "=&r" (hi)
128-
:
129-
: "g1");
130-
return lo | ((u64)hi << 32);
108+
register unsigned long long ret asm("o4");
109+
110+
__asm__ __volatile__("1:\n\t"
111+
"rd %%tick, %L0\n\t"
112+
"srlx %L0, 32, %H0\n\t"
113+
".pushsection .tick_patch, \"a\"\n\t"
114+
".word 1b - ., 1f - .\n\t"
115+
".popsection\n\t"
116+
".pushsection .tick_patch_replacement, \"ax\"\n\t"
117+
"1:\n\t"
118+
"rd %%asr24, %L0\n\t"
119+
".popsection\n"
120+
: "=r" (ret));
121+
return ret;
131122
}
132123
#endif
133124

134-
static notrace inline u64
135-
vgetsns(struct vvar_data *vvar)
125+
notrace static __always_inline u64 vgetsns(struct vvar_data *vvar)
136126
{
137127
u64 v;
138128
u64 cycles;
@@ -142,13 +132,12 @@ vgetsns(struct vvar_data *vvar)
142132
return v * vvar->clock.mult;
143133
}
144134

145-
static notrace noinline int
146-
do_realtime(struct vvar_data *vvar, struct timespec *ts)
135+
notrace static __always_inline int do_realtime(struct vvar_data *vvar,
136+
struct timespec *ts)
147137
{
148138
unsigned long seq;
149139
u64 ns;
150140

151-
ts->tv_nsec = 0;
152141
do {
153142
seq = vvar_read_begin(vvar);
154143
ts->tv_sec = vvar->wall_time_sec;
@@ -157,18 +146,18 @@ do_realtime(struct vvar_data *vvar, struct timespec *ts)
157146
ns >>= vvar->clock.shift;
158147
} while (unlikely(vvar_read_retry(vvar, seq)));
159148

160-
timespec_add_ns(ts, ns);
149+
ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
150+
ts->tv_nsec = ns;
161151

162152
return 0;
163153
}
164154

165-
static notrace noinline int
166-
do_monotonic(struct vvar_data *vvar, struct timespec *ts)
155+
notrace static __always_inline int do_monotonic(struct vvar_data *vvar,
156+
struct timespec *ts)
167157
{
168158
unsigned long seq;
169159
u64 ns;
170160

171-
ts->tv_nsec = 0;
172161
do {
173162
seq = vvar_read_begin(vvar);
174163
ts->tv_sec = vvar->monotonic_time_sec;
@@ -177,13 +166,14 @@ do_monotonic(struct vvar_data *vvar, struct timespec *ts)
177166
ns >>= vvar->clock.shift;
178167
} while (unlikely(vvar_read_retry(vvar, seq)));
179168

180-
timespec_add_ns(ts, ns);
169+
ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
170+
ts->tv_nsec = ns;
181171

182172
return 0;
183173
}
184174

185-
static notrace noinline int
186-
do_realtime_coarse(struct vvar_data *vvar, struct timespec *ts)
175+
notrace static int do_realtime_coarse(struct vvar_data *vvar,
176+
struct timespec *ts)
187177
{
188178
unsigned long seq;
189179

@@ -195,8 +185,8 @@ do_realtime_coarse(struct vvar_data *vvar, struct timespec *ts)
195185
return 0;
196186
}
197187

198-
static notrace noinline int
199-
do_monotonic_coarse(struct vvar_data *vvar, struct timespec *ts)
188+
notrace static int do_monotonic_coarse(struct vvar_data *vvar,
189+
struct timespec *ts)
200190
{
201191
unsigned long seq;
202192

arch/sparc/vdso/vdso-layout.lds.S

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -73,11 +73,8 @@ SECTIONS
7373

7474
.text : { *(.text*) } :text =0x90909090,
7575

76-
.vread_tick_patch : {
77-
vread_tick_patch_start = .;
78-
*(.vread_tick_patch)
79-
vread_tick_patch_end = .;
80-
}
76+
.tick_patch : { *(.tick_patch) } :text
77+
.tick_patch_insns : { *(.tick_patch_insns) } :text
8178

8279
/DISCARD/ : {
8380
*(.discard)

arch/sparc/vdso/vdso2c.c

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -63,9 +63,6 @@ enum {
6363
sym_vvar_start,
6464
sym_VDSO_FAKE_SECTION_TABLE_START,
6565
sym_VDSO_FAKE_SECTION_TABLE_END,
66-
sym_vread_tick,
67-
sym_vread_tick_patch_start,
68-
sym_vread_tick_patch_end
6966
};
7067

7168
struct vdso_sym {
@@ -81,9 +78,6 @@ struct vdso_sym required_syms[] = {
8178
[sym_VDSO_FAKE_SECTION_TABLE_END] = {
8279
"VDSO_FAKE_SECTION_TABLE_END", 0
8380
},
84-
[sym_vread_tick] = {"vread_tick", 1},
85-
[sym_vread_tick_patch_start] = {"vread_tick_patch_start", 1},
86-
[sym_vread_tick_patch_end] = {"vread_tick_patch_end", 1}
8781
};
8882

8983
__attribute__((format(printf, 1, 2))) __attribute__((noreturn))

0 commit comments

Comments
 (0)