Skip to content

Commit d7a5a18

Browse files
committed
Merge branch 'x86-tsc-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'x86-tsc-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: x86: Check tsc available/disabled in the delayed init function x86: Improve TSC calibration using a delayed workqueue x86: Make tsc=reliable override boot time stability checks
2 parents 4f00b90 + a8760ec commit d7a5a18

File tree

2 files changed

+96
-9
lines changed

2 files changed

+96
-9
lines changed

Documentation/kernel-parameters.txt

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2461,12 +2461,13 @@ and is between 256 and 4096 characters. It is defined in the file
24612461
to facilitate early boot debugging.
24622462
See also Documentation/trace/events.txt
24632463

2464-
tsc= Disable clocksource-must-verify flag for TSC.
2464+
tsc= Disable clocksource stability checks for TSC.
24652465
Format: <string>
24662466
[x86] reliable: mark tsc clocksource as reliable, this
2467-
disables clocksource verification at runtime.
2468-
Used to enable high-resolution timer mode on older
2469-
hardware, and in virtualized environment.
2467+
disables clocksource verification at runtime, as well
2468+
as the stability checks done at bootup. Used to enable
2469+
high-resolution timer mode on older hardware, and in
2470+
virtualized environment.
24702471
[x86] noirqtime: Do not use TSC to do irq accounting.
24712472
Used to run time disable IRQ_TIME_ACCOUNTING on any
24722473
platforms where RDTSC is slow and this accounting

arch/x86/kernel/tsc.c

Lines changed: 91 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -872,30 +872,117 @@ __cpuinit int unsynchronized_tsc(void)
872872

873873
if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
874874
return 0;
875+
876+
if (tsc_clocksource_reliable)
877+
return 0;
875878
/*
876879
* Intel systems are normally all synchronized.
877880
* Exceptions must mark TSC as unstable:
878881
*/
879882
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
880883
/* assume multi socket systems are not synchronized: */
881884
if (num_possible_cpus() > 1)
882-
tsc_unstable = 1;
885+
return 1;
883886
}
884887

885-
return tsc_unstable;
888+
return 0;
889+
}
890+
891+
892+
static void tsc_refine_calibration_work(struct work_struct *work);
893+
static DECLARE_DELAYED_WORK(tsc_irqwork, tsc_refine_calibration_work);
894+
/**
895+
* tsc_refine_calibration_work - Further refine tsc freq calibration
896+
* @work - ignored.
897+
*
898+
* This functions uses delayed work over a period of a
899+
* second to further refine the TSC freq value. Since this is
900+
* timer based, instead of loop based, we don't block the boot
901+
* process while this longer calibration is done.
902+
*
903+
* If there are any calibration anomolies (too many SMIs, etc),
904+
* or the refined calibration is off by 1% of the fast early
905+
* calibration, we throw out the new calibration and use the
906+
* early calibration.
907+
*/
908+
static void tsc_refine_calibration_work(struct work_struct *work)
909+
{
910+
static u64 tsc_start = -1, ref_start;
911+
static int hpet;
912+
u64 tsc_stop, ref_stop, delta;
913+
unsigned long freq;
914+
915+
/* Don't bother refining TSC on unstable systems */
916+
if (check_tsc_unstable())
917+
goto out;
918+
919+
/*
920+
* Since the work is started early in boot, we may be
921+
* delayed the first time we expire. So set the workqueue
922+
* again once we know timers are working.
923+
*/
924+
if (tsc_start == -1) {
925+
/*
926+
* Only set hpet once, to avoid mixing hardware
927+
* if the hpet becomes enabled later.
928+
*/
929+
hpet = is_hpet_enabled();
930+
schedule_delayed_work(&tsc_irqwork, HZ);
931+
tsc_start = tsc_read_refs(&ref_start, hpet);
932+
return;
933+
}
934+
935+
tsc_stop = tsc_read_refs(&ref_stop, hpet);
936+
937+
/* hpet or pmtimer available ? */
938+
if (!hpet && !ref_start && !ref_stop)
939+
goto out;
940+
941+
/* Check, whether the sampling was disturbed by an SMI */
942+
if (tsc_start == ULLONG_MAX || tsc_stop == ULLONG_MAX)
943+
goto out;
944+
945+
delta = tsc_stop - tsc_start;
946+
delta *= 1000000LL;
947+
if (hpet)
948+
freq = calc_hpet_ref(delta, ref_start, ref_stop);
949+
else
950+
freq = calc_pmtimer_ref(delta, ref_start, ref_stop);
951+
952+
/* Make sure we're within 1% */
953+
if (abs(tsc_khz - freq) > tsc_khz/100)
954+
goto out;
955+
956+
tsc_khz = freq;
957+
printk(KERN_INFO "Refined TSC clocksource calibration: "
958+
"%lu.%03lu MHz.\n", (unsigned long)tsc_khz / 1000,
959+
(unsigned long)tsc_khz % 1000);
960+
961+
out:
962+
clocksource_register_khz(&clocksource_tsc, tsc_khz);
886963
}
887964

888-
static void __init init_tsc_clocksource(void)
965+
966+
static int __init init_tsc_clocksource(void)
889967
{
968+
if (!cpu_has_tsc || tsc_disabled > 0)
969+
return 0;
970+
890971
if (tsc_clocksource_reliable)
891972
clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
892973
/* lower the rating if we already know its unstable: */
893974
if (check_tsc_unstable()) {
894975
clocksource_tsc.rating = 0;
895976
clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS;
896977
}
897-
clocksource_register_khz(&clocksource_tsc, tsc_khz);
978+
schedule_delayed_work(&tsc_irqwork, 0);
979+
return 0;
898980
}
981+
/*
982+
* We use device_initcall here, to ensure we run after the hpet
983+
* is fully initialized, which may occur at fs_initcall time.
984+
*/
985+
device_initcall(init_tsc_clocksource);
899986

900987
void __init tsc_init(void)
901988
{
@@ -949,6 +1036,5 @@ void __init tsc_init(void)
9491036
mark_tsc_unstable("TSCs unsynchronized");
9501037

9511038
check_system_tsc_reliable();
952-
init_tsc_clocksource();
9531039
}
9541040

0 commit comments

Comments
 (0)