Skip to content

Commit 5cc97bf

Browse files
author
Linus Torvalds
committed
Merge branch 'xen-upstream' of ssh://master.kernel.org/pub/scm/linux/kernel/git/jeremy/xen
* 'xen-upstream' of ssh://master.kernel.org/pub/scm/linux/kernel/git/jeremy/xen: (44 commits) xen: disable all non-virtual drivers xen: use iret directly when possible xen: suppress abs symbol warnings for unused reloc pointers xen: Attempt to patch inline versions of common operations xen: Place vcpu_info structure into per-cpu memory xen: handle external requests for shutdown, reboot and sysrq xen: machine operations xen: add virtual network device driver xen: add virtual block device driver. xen: add the Xenbus sysfs and virtual device hotplug driver xen: Add grant table support xen: use the hvc console infrastructure for Xen console xen: hack to prevent bad segment register reload xen: lazy-mmu operations xen: Add support for preemption xen: SMP guest support xen: Implement sched_clock xen: Account for stolen time xen: ignore RW mapping of RO pages in pagetable_init xen: Complete pagetable pinning ...
2 parents 826ea8f + dfdcdd4 commit 5cc97bf

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

116 files changed

+15031
-210
lines changed

arch/i386/Kconfig

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,8 @@ config PARAVIRT
222222
However, when run without a hypervisor the kernel is
223223
theoretically slower. If in doubt, say N.
224224

225+
source "arch/i386/xen/Kconfig"
226+
225227
config VMI
226228
bool "VMI Paravirt-ops support"
227229
depends on PARAVIRT

arch/i386/Makefile

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,9 @@ mflags-$(CONFIG_X86_ES7000) := -Iinclude/asm-i386/mach-es7000
9393
mcore-$(CONFIG_X86_ES7000) := mach-default
9494
core-$(CONFIG_X86_ES7000) := arch/i386/mach-es7000/
9595

96+
# Xen paravirtualization support
97+
core-$(CONFIG_XEN) += arch/i386/xen/
98+
9699
# default subarch .h files
97100
mflags-y += -Iinclude/asm-i386/mach-default
98101

arch/i386/boot/compressed/relocs.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@ static const char* safe_abs_relocs[] = {
3131
"__kernel_rt_sigreturn",
3232
"__kernel_sigreturn",
3333
"SYSENTER_RETURN",
34+
"xen_irq_disable_direct_reloc",
35+
"xen_save_fl_direct_reloc",
3436
};
3537

3638
static int is_safe_abs_reloc(const char* sym_name)

arch/i386/kernel/asm-offsets.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
#include <asm/thread_info.h>
1818
#include <asm/elf.h>
1919

20+
#include <xen/interface/xen.h>
21+
2022
#define DEFINE(sym, val) \
2123
asm volatile("\n->" #sym " %0 " #val : : "i" (val))
2224

@@ -59,6 +61,7 @@ void foo(void)
5961
OFFSET(TI_addr_limit, thread_info, addr_limit);
6062
OFFSET(TI_restart_block, thread_info, restart_block);
6163
OFFSET(TI_sysenter_return, thread_info, sysenter_return);
64+
OFFSET(TI_cpu, thread_info, cpu);
6265
BLANK();
6366

6467
OFFSET(GDS_size, Xgt_desc_struct, size);
@@ -115,4 +118,10 @@ void foo(void)
115118
OFFSET(PARAVIRT_iret, paravirt_ops, iret);
116119
OFFSET(PARAVIRT_read_cr0, paravirt_ops, read_cr0);
117120
#endif
121+
122+
#ifdef CONFIG_XEN
123+
BLANK();
124+
OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask);
125+
OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending);
126+
#endif
118127
}

arch/i386/kernel/entry.S

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1023,6 +1023,91 @@ ENTRY(kernel_thread_helper)
10231023
CFI_ENDPROC
10241024
ENDPROC(kernel_thread_helper)
10251025

1026+
#ifdef CONFIG_XEN
1027+
ENTRY(xen_hypervisor_callback)
1028+
CFI_STARTPROC
1029+
pushl $0
1030+
CFI_ADJUST_CFA_OFFSET 4
1031+
SAVE_ALL
1032+
TRACE_IRQS_OFF
1033+
1034+
/* Check to see if we got the event in the critical
1035+
region in xen_iret_direct, after we've reenabled
1036+
events and checked for pending events. This simulates
1037+
iret instruction's behaviour where it delivers a
1038+
pending interrupt when enabling interrupts. */
1039+
movl PT_EIP(%esp),%eax
1040+
cmpl $xen_iret_start_crit,%eax
1041+
jb 1f
1042+
cmpl $xen_iret_end_crit,%eax
1043+
jae 1f
1044+
1045+
call xen_iret_crit_fixup
1046+
1047+
1: mov %esp, %eax
1048+
call xen_evtchn_do_upcall
1049+
jmp ret_from_intr
1050+
CFI_ENDPROC
1051+
ENDPROC(xen_hypervisor_callback)
1052+
1053+
# Hypervisor uses this for application faults while it executes.
1054+
# We get here for two reasons:
1055+
# 1. Fault while reloading DS, ES, FS or GS
1056+
# 2. Fault while executing IRET
1057+
# Category 1 we fix up by reattempting the load, and zeroing the segment
1058+
# register if the load fails.
1059+
# Category 2 we fix up by jumping to do_iret_error. We cannot use the
1060+
# normal Linux return path in this case because if we use the IRET hypercall
1061+
# to pop the stack frame we end up in an infinite loop of failsafe callbacks.
1062+
# We distinguish between categories by maintaining a status value in EAX.
1063+
ENTRY(xen_failsafe_callback)
1064+
CFI_STARTPROC
1065+
pushl %eax
1066+
CFI_ADJUST_CFA_OFFSET 4
1067+
movl $1,%eax
1068+
1: mov 4(%esp),%ds
1069+
2: mov 8(%esp),%es
1070+
3: mov 12(%esp),%fs
1071+
4: mov 16(%esp),%gs
1072+
testl %eax,%eax
1073+
popl %eax
1074+
CFI_ADJUST_CFA_OFFSET -4
1075+
lea 16(%esp),%esp
1076+
CFI_ADJUST_CFA_OFFSET -16
1077+
jz 5f
1078+
addl $16,%esp
1079+
jmp iret_exc # EAX != 0 => Category 2 (Bad IRET)
1080+
5: pushl $0 # EAX == 0 => Category 1 (Bad segment)
1081+
CFI_ADJUST_CFA_OFFSET 4
1082+
SAVE_ALL
1083+
jmp ret_from_exception
1084+
CFI_ENDPROC
1085+
1086+
.section .fixup,"ax"
1087+
6: xorl %eax,%eax
1088+
movl %eax,4(%esp)
1089+
jmp 1b
1090+
7: xorl %eax,%eax
1091+
movl %eax,8(%esp)
1092+
jmp 2b
1093+
8: xorl %eax,%eax
1094+
movl %eax,12(%esp)
1095+
jmp 3b
1096+
9: xorl %eax,%eax
1097+
movl %eax,16(%esp)
1098+
jmp 4b
1099+
.previous
1100+
.section __ex_table,"a"
1101+
.align 4
1102+
.long 1b,6b
1103+
.long 2b,7b
1104+
.long 3b,8b
1105+
.long 4b,9b
1106+
.previous
1107+
ENDPROC(xen_failsafe_callback)
1108+
1109+
#endif /* CONFIG_XEN */
1110+
10261111
.section .rodata,"a"
10271112
#include "syscall_table.S"
10281113

arch/i386/kernel/head.S

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -510,7 +510,8 @@ ENTRY(_stext)
510510
/*
511511
* BSS section
512512
*/
513-
.section ".bss.page_aligned","w"
513+
.section ".bss.page_aligned","wa"
514+
.align PAGE_SIZE_asm
514515
ENTRY(swapper_pg_dir)
515516
.fill 1024,4,0
516517
ENTRY(swapper_pg_pmd)
@@ -538,6 +539,8 @@ fault_msg:
538539
.ascii "Int %d: CR2 %p err %p EIP %p CS %p flags %p\n"
539540
.asciz "Stack: %p %p %p %p %p %p %p %p\n"
540541

542+
#include "../xen/xen-head.S"
543+
541544
/*
542545
* The IDT and GDT 'descriptors' are a strange 48-bit object
543546
* only used by the lidt and lgdt instructions. They are not

arch/i386/kernel/paravirt.c

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,41 @@ static int __init print_banner(void)
228228
}
229229
core_initcall(print_banner);
230230

231+
static struct resource reserve_ioports = {
232+
.start = 0,
233+
.end = IO_SPACE_LIMIT,
234+
.name = "paravirt-ioport",
235+
.flags = IORESOURCE_IO | IORESOURCE_BUSY,
236+
};
237+
238+
static struct resource reserve_iomem = {
239+
.start = 0,
240+
.end = -1,
241+
.name = "paravirt-iomem",
242+
.flags = IORESOURCE_MEM | IORESOURCE_BUSY,
243+
};
244+
245+
/*
246+
* Reserve the whole legacy IO space to prevent any legacy drivers
247+
* from wasting time probing for their hardware. This is a fairly
248+
* brute-force approach to disabling all non-virtual drivers.
249+
*
250+
* Note that this must be called very early to have any effect.
251+
*/
252+
int paravirt_disable_iospace(void)
253+
{
254+
int ret;
255+
256+
ret = request_resource(&ioport_resource, &reserve_ioports);
257+
if (ret == 0) {
258+
ret = request_resource(&iomem_resource, &reserve_iomem);
259+
if (ret)
260+
release_resource(&reserve_ioports);
261+
}
262+
263+
return ret;
264+
}
265+
231266
struct paravirt_ops paravirt_ops = {
232267
.name = "bare hardware",
233268
.paravirt_enabled = 0,
@@ -267,7 +302,7 @@ struct paravirt_ops paravirt_ops = {
267302
.write_msr = native_write_msr_safe,
268303
.read_tsc = native_read_tsc,
269304
.read_pmc = native_read_pmc,
270-
.get_scheduled_cycles = native_read_tsc,
305+
.sched_clock = native_sched_clock,
271306
.get_cpu_khz = native_calculate_cpu_khz,
272307
.load_tr_desc = native_load_tr_desc,
273308
.set_ldt = native_set_ldt,

arch/i386/kernel/setup.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -601,6 +601,8 @@ void __init setup_arch(char **cmdline_p)
601601
* NOTE: at this point the bootmem allocator is fully available.
602602
*/
603603

604+
paravirt_post_allocator_init();
605+
604606
dmi_scan_machine();
605607

606608
#ifdef CONFIG_X86_GENERICARCH

arch/i386/kernel/smp.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222

2323
#include <asm/mtrr.h>
2424
#include <asm/tlbflush.h>
25+
#include <asm/mmu_context.h>
2526
#include <mach_apic.h>
2627

2728
/*
@@ -249,13 +250,13 @@ static unsigned long flush_va;
249250
static DEFINE_SPINLOCK(tlbstate_lock);
250251

251252
/*
252-
* We cannot call mmdrop() because we are in interrupt context,
253+
* We cannot call mmdrop() because we are in interrupt context,
253254
* instead update mm->cpu_vm_mask.
254255
*
255256
* We need to reload %cr3 since the page tables may be going
256257
* away from under us..
257258
*/
258-
static inline void leave_mm (unsigned long cpu)
259+
void leave_mm(unsigned long cpu)
259260
{
260261
if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK)
261262
BUG();

arch/i386/kernel/smpboot.c

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ void __init smp_alloc_memory(void)
148148
* a given CPU
149149
*/
150150

151-
static void __cpuinit smp_store_cpu_info(int id)
151+
void __cpuinit smp_store_cpu_info(int id)
152152
{
153153
struct cpuinfo_x86 *c = cpu_data + id;
154154

@@ -308,8 +308,7 @@ cpumask_t cpu_coregroup_map(int cpu)
308308
/* representing cpus for which sibling maps can be computed */
309309
static cpumask_t cpu_sibling_setup_map;
310310

311-
static inline void
312-
set_cpu_sibling_map(int cpu)
311+
void set_cpu_sibling_map(int cpu)
313312
{
314313
int i;
315314
struct cpuinfo_x86 *c = cpu_data;
@@ -1144,8 +1143,7 @@ void __init native_smp_prepare_boot_cpu(void)
11441143
}
11451144

11461145
#ifdef CONFIG_HOTPLUG_CPU
1147-
static void
1148-
remove_siblinginfo(int cpu)
1146+
void remove_siblinginfo(int cpu)
11491147
{
11501148
int sibling;
11511149
struct cpuinfo_x86 *c = cpu_data;

arch/i386/kernel/tsc.c

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ static inline int check_tsc_unstable(void)
8484
*
8585
* [email protected] "math is hard, lets go shopping!"
8686
*/
87-
static unsigned long cyc2ns_scale __read_mostly;
87+
unsigned long cyc2ns_scale __read_mostly;
8888

8989
#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
9090

@@ -93,15 +93,10 @@ static inline void set_cyc2ns_scale(unsigned long cpu_khz)
9393
cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz;
9494
}
9595

96-
static inline unsigned long long cycles_2_ns(unsigned long long cyc)
97-
{
98-
return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
99-
}
100-
10196
/*
10297
* Scheduler clock - returns current time in nanosec units.
10398
*/
104-
unsigned long long sched_clock(void)
99+
unsigned long long native_sched_clock(void)
105100
{
106101
unsigned long long this_offset;
107102

@@ -118,12 +113,24 @@ unsigned long long sched_clock(void)
118113
return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ);
119114

120115
/* read the Time Stamp Counter: */
121-
get_scheduled_cycles(this_offset);
116+
rdtscll(this_offset);
122117

123118
/* return the value in ns */
124119
return cycles_2_ns(this_offset);
125120
}
126121

122+
/* We need to define a real function for sched_clock, to override the
123+
weak default version */
124+
#ifdef CONFIG_PARAVIRT
125+
unsigned long long sched_clock(void)
126+
{
127+
return paravirt_sched_clock();
128+
}
129+
#else
130+
unsigned long long sched_clock(void)
131+
__attribute__((alias("native_sched_clock")));
132+
#endif
133+
127134
unsigned long native_calculate_cpu_khz(void)
128135
{
129136
unsigned long long start, end;

arch/i386/kernel/vmi.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -362,7 +362,7 @@ static void *vmi_kmap_atomic_pte(struct page *page, enum km_type type)
362362
}
363363
#endif
364364

365-
static void vmi_allocate_pt(u32 pfn)
365+
static void vmi_allocate_pt(struct mm_struct *mm, u32 pfn)
366366
{
367367
vmi_set_page_type(pfn, VMI_PAGE_L1);
368368
vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0);
@@ -891,7 +891,7 @@ static inline int __init activate_vmi(void)
891891
paravirt_ops.setup_boot_clock = vmi_time_bsp_init;
892892
paravirt_ops.setup_secondary_clock = vmi_time_ap_init;
893893
#endif
894-
paravirt_ops.get_scheduled_cycles = vmi_get_sched_cycles;
894+
paravirt_ops.sched_clock = vmi_sched_clock;
895895
paravirt_ops.get_cpu_khz = vmi_cpu_khz;
896896

897897
/* We have true wallclock functions; disable CMOS clock sync */

arch/i386/kernel/vmiclock.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -64,10 +64,10 @@ int vmi_set_wallclock(unsigned long now)
6464
return 0;
6565
}
6666

67-
/* paravirt_ops.get_scheduled_cycles = vmi_get_sched_cycles */
68-
unsigned long long vmi_get_sched_cycles(void)
67+
/* paravirt_ops.sched_clock = vmi_sched_clock */
68+
unsigned long long vmi_sched_clock(void)
6969
{
70-
return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE);
70+
return cycles_2_ns(vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE));
7171
}
7272

7373
/* paravirt_ops.get_cpu_khz = vmi_cpu_khz */

arch/i386/kernel/vmlinux.lds.S

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ SECTIONS
8888

8989
. = ALIGN(4096);
9090
.data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) {
91+
*(.data.page_aligned)
9192
*(.data.idt)
9293
}
9394

0 commit comments

Comments
 (0)