Skip to content

Commit 6e3cd95

Browse files
committed
x86/hpet: Use another crystalball to evaluate HPET usability
On recent Intel systems the HPET stops working when the system reaches PC10 idle state. The approach of adding PCI ids to the early quirks to disable HPET on these systems is a whack a mole game which makes no sense. Check for PC10 instead and force disable HPET if supported. The check is overbroad as it does not take ACPI, intel_idle enablement and command line parameters into account. That's fine as long as there is at least PMTIMER available to calibrate the TSC frequency. The decision can be overruled by adding "hpet=force" on the kernel command line. Remove the related early PCI quirks for affected Ice Cake and Coffin Lake systems as they are not longer required. That should also cover all other systems, i.e. Tiger Rag and newer generations, which are most likely affected by this as well. Fixes: Yet another hardware trainwreck Reported-by: Jakub Kicinski <[email protected]> Signed-off-by: Thomas Gleixner <[email protected]> Tested-by: Jakub Kicinski <[email protected]> Reviewed-by: Rafael J. Wysocki <[email protected]> Cc: [email protected] Cc: Kai-Heng Feng <[email protected]> Cc: Bjorn Helgaas <[email protected]>
1 parent 06f2ac3 commit 6e3cd95

File tree

2 files changed

+81
-6
lines changed

2 files changed

+81
-6
lines changed

arch/x86/kernel/early-quirks.c

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -714,12 +714,6 @@ static struct chipset early_qrk[] __initdata = {
714714
*/
715715
{ PCI_VENDOR_ID_INTEL, 0x0f00,
716716
PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet},
717-
{ PCI_VENDOR_ID_INTEL, 0x3e20,
718-
PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet},
719-
{ PCI_VENDOR_ID_INTEL, 0x3ec4,
720-
PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet},
721-
{ PCI_VENDOR_ID_INTEL, 0x8a12,
722-
PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet},
723717
{ PCI_VENDOR_ID_BROADCOM, 0x4331,
724718
PCI_CLASS_NETWORK_OTHER, PCI_ANY_ID, 0, apple_airport_reset},
725719
{}

arch/x86/kernel/hpet.c

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include <asm/irq_remapping.h>
1111
#include <asm/hpet.h>
1212
#include <asm/time.h>
13+
#include <asm/mwait.h>
1314

1415
#undef pr_fmt
1516
#define pr_fmt(fmt) "hpet: " fmt
@@ -916,6 +917,83 @@ static bool __init hpet_counting(void)
916917
return false;
917918
}
918919

920+
static bool __init mwait_pc10_supported(void)
921+
{
922+
unsigned int eax, ebx, ecx, mwait_substates;
923+
924+
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
925+
return false;
926+
927+
if (!cpu_feature_enabled(X86_FEATURE_MWAIT))
928+
return false;
929+
930+
if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
931+
return false;
932+
933+
cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates);
934+
935+
return (ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) &&
936+
(ecx & CPUID5_ECX_INTERRUPT_BREAK) &&
937+
(mwait_substates & (0xF << 28));
938+
}
939+
940+
/*
941+
* Check whether the system supports PC10. If so force disable HPET as that
942+
* stops counting in PC10. This check is overbroad as it does not take any
943+
* of the following into account:
944+
*
945+
* - ACPI tables
946+
* - Enablement of intel_idle
947+
* - Command line arguments which limit intel_idle C-state support
948+
*
949+
* That's perfectly fine. HPET is a piece of hardware designed by committee
950+
* and the only reasons why it is still in use on modern systems is the
951+
* fact that it is impossible to reliably query TSC and CPU frequency via
952+
* CPUID or firmware.
953+
*
954+
* If HPET is functional it is useful for calibrating TSC, but this can be
955+
* done via PMTIMER as well which seems to be the last remaining timer on
956+
* X86/INTEL platforms that has not been completely wreckaged by feature
957+
* creep.
958+
*
959+
* In theory HPET support should be removed altogether, but there are older
960+
* systems out there which depend on it because TSC and APIC timer are
961+
* dysfunctional in deeper C-states.
962+
*
963+
* It's only 20 years now that hardware people have been asked to provide
964+
* reliable and discoverable facilities which can be used for timekeeping
965+
* and per CPU timer interrupts.
966+
*
967+
* The probability that this problem is going to be solved in the
968+
* forseeable future is close to zero, so the kernel has to be cluttered
969+
* with heuristics to keep up with the ever growing amount of hardware and
970+
* firmware trainwrecks. Hopefully some day hardware people will understand
971+
* that the approach of "This can be fixed in software" is not sustainable.
972+
* Hope dies last...
973+
*/
974+
static bool __init hpet_is_pc10_damaged(void)
975+
{
976+
unsigned long long pcfg;
977+
978+
/* Check whether PC10 substates are supported */
979+
if (!mwait_pc10_supported())
980+
return false;
981+
982+
/* Check whether PC10 is enabled in PKG C-state limit */
983+
rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, pcfg);
984+
if ((pcfg & 0xF) < 8)
985+
return false;
986+
987+
if (hpet_force_user) {
988+
pr_warn("HPET force enabled via command line, but dysfunctional in PC10.\n");
989+
return false;
990+
}
991+
992+
pr_info("HPET dysfunctional in PC10. Force disabled.\n");
993+
boot_hpet_disable = true;
994+
return true;
995+
}
996+
919997
/**
920998
* hpet_enable - Try to setup the HPET timer. Returns 1 on success.
921999
*/
@@ -929,6 +1007,9 @@ int __init hpet_enable(void)
9291007
if (!is_hpet_capable())
9301008
return 0;
9311009

1010+
if (hpet_is_pc10_damaged())
1011+
return 0;
1012+
9321013
hpet_set_mapping();
9331014
if (!hpet_virt_address)
9341015
return 0;

0 commit comments

Comments
 (0)