From c716a18bce8a97553f5714230e16a4fd52e3d1d1 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Apr 20 2023 11:00:56 +0000 Subject: thermal: intel: Avoid updating unsupported THERM_STATUS_CLEAR mask bits (bsc#1012628). --- diff --git a/patches.kernel.org/6.2.12-110-thermal-intel-Avoid-updating-unsupported-THERM.patch b/patches.kernel.org/6.2.12-110-thermal-intel-Avoid-updating-unsupported-THERM.patch new file mode 100644 index 0000000..2fdd44c --- /dev/null +++ b/patches.kernel.org/6.2.12-110-thermal-intel-Avoid-updating-unsupported-THERM.patch @@ -0,0 +1,144 @@ +From: Srinivas Pandruvada +Date: Mon, 10 Apr 2023 10:35:01 -0700 +Subject: [PATCH] thermal: intel: Avoid updating unsupported THERM_STATUS_CLEAR + mask bits +References: bsc#1012628 +Patch-mainline: 6.2.12 +Git-commit: 117e4e5bd9d47b89777dbf6b37a709dcfe59520f + +commit 117e4e5bd9d47b89777dbf6b37a709dcfe59520f upstream. + +Some older processors don't allow BIT(13) and BIT(15) in the current +mask set by "THERM_STATUS_CLEAR_CORE_MASK". This results in: + +unchecked MSR access error: WRMSR to 0x19c (tried to +write 0x000000000000aaa8) at rIP: 0xffffffff816f66a6 +(throttle_active_work+0xa6/0x1d0) + +To avoid unchecked MSR issues, check CPUID for each relevant feature and +use that information to set the supported feature bits only in the +"clear" mask for cores. Do the same for the analogous package mask set +by "THERM_STATUS_CLEAR_PKG_MASK". + +Introduce functions thermal_intr_init_core_clear_mask() and +thermal_intr_init_pkg_clear_mask() to set core and package mask bits, +respectively. These functions are called during initialization. + +Fixes: 6fe1e64b6026 ("thermal: intel: Prevent accidental clearing of HFI status") +Reported-by: Rui Salvaterra +Link: https://lore.kernel.org/lkml/cdf43fb423368ee3994124a9e8c9b4f8d00712c6.camel@linux.intel.com/T/ +Tested-by: Rui Salvaterra +Signed-off-by: Srinivas Pandruvada +Cc: 6.2+ # 6.2+ +[ rjw: Renamed 2 funtions and 2 static variables, edited subject and + changelog ] +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Jiri Slaby +--- + drivers/thermal/intel/therm_throt.c | 73 ++++++++++++++++++++++++++--- + 1 file changed, 66 insertions(+), 7 deletions(-) + +diff --git a/drivers/thermal/intel/therm_throt.c b/drivers/thermal/intel/therm_throt.c +index 2e22bb82..e69868e8 100644 +--- a/drivers/thermal/intel/therm_throt.c ++++ b/drivers/thermal/intel/therm_throt.c +@@ -193,8 +193,67 @@ static const struct attribute_group thermal_attr_group = { + #define THERM_THROT_POLL_INTERVAL HZ + #define THERM_STATUS_PROCHOT_LOG BIT(1) + +-#define THERM_STATUS_CLEAR_CORE_MASK (BIT(1) | BIT(3) | BIT(5) | BIT(7) | BIT(9) | BIT(11) | BIT(13) | BIT(15)) +-#define THERM_STATUS_CLEAR_PKG_MASK (BIT(1) | BIT(3) | BIT(5) | BIT(7) | BIT(9) | BIT(11)) ++static u64 therm_intr_core_clear_mask; ++static u64 therm_intr_pkg_clear_mask; ++ ++static void thermal_intr_init_core_clear_mask(void) ++{ ++ if (therm_intr_core_clear_mask) ++ return; ++ ++ /* ++ * Reference: Intel SDM Volume 4 ++ * "Table 2-2. IA-32 Architectural MSRs", MSR 0x19C ++ * IA32_THERM_STATUS. ++ */ ++ ++ /* ++ * Bit 1, 3, 5: CPUID.01H:EDX[22] = 1. This driver will not ++ * enable interrupts, when 0 as it checks for X86_FEATURE_ACPI. ++ */ ++ therm_intr_core_clear_mask = (BIT(1) | BIT(3) | BIT(5)); ++ ++ /* ++ * Bit 7 and 9: Thermal Threshold #1 and #2 log ++ * If CPUID.01H:ECX[8] = 1 ++ */ ++ if (boot_cpu_has(X86_FEATURE_TM2)) ++ therm_intr_core_clear_mask |= (BIT(7) | BIT(9)); ++ ++ /* Bit 11: Power Limitation log (R/WC0) If CPUID.06H:EAX[4] = 1 */ ++ if (boot_cpu_has(X86_FEATURE_PLN)) ++ therm_intr_core_clear_mask |= BIT(11); ++ ++ /* ++ * Bit 13: Current Limit log (R/WC0) If CPUID.06H:EAX[7] = 1 ++ * Bit 15: Cross Domain Limit log (R/WC0) If CPUID.06H:EAX[7] = 1 ++ */ ++ if (boot_cpu_has(X86_FEATURE_HWP)) ++ therm_intr_core_clear_mask |= (BIT(13) | BIT(15)); ++} ++ ++static void thermal_intr_init_pkg_clear_mask(void) ++{ ++ if (therm_intr_pkg_clear_mask) ++ return; ++ ++ /* ++ * Reference: Intel SDM Volume 4 ++ * "Table 2-2. IA-32 Architectural MSRs", MSR 0x1B1 ++ * IA32_PACKAGE_THERM_STATUS. ++ */ ++ ++ /* All bits except BIT 26 depend on CPUID.06H: EAX[6] = 1 */ ++ if (boot_cpu_has(X86_FEATURE_PTS)) ++ therm_intr_pkg_clear_mask = (BIT(1) | BIT(3) | BIT(5) | BIT(7) | BIT(9) | BIT(11)); ++ ++ /* ++ * Intel SDM Volume 2A: Thermal and Power Management Leaf ++ * Bit 26: CPUID.06H: EAX[19] = 1 ++ */ ++ if (boot_cpu_has(X86_FEATURE_HFI)) ++ therm_intr_pkg_clear_mask |= BIT(26); ++} + + /* + * Clear the bits in package thermal status register for bit = 1 +@@ -207,13 +266,10 @@ void thermal_clear_package_intr_status(int level, u64 bit_mask) + + if (level == CORE_LEVEL) { + msr = MSR_IA32_THERM_STATUS; +- msr_val = THERM_STATUS_CLEAR_CORE_MASK; ++ msr_val = therm_intr_core_clear_mask; + } else { + msr = MSR_IA32_PACKAGE_THERM_STATUS; +- msr_val = THERM_STATUS_CLEAR_PKG_MASK; +- if (boot_cpu_has(X86_FEATURE_HFI)) +- msr_val |= BIT(26); +- ++ msr_val = therm_intr_pkg_clear_mask; + } + + msr_val &= ~bit_mask; +@@ -708,6 +764,9 @@ void intel_init_thermal(struct cpuinfo_x86 *c) + h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED; + apic_write(APIC_LVTTHMR, h); + ++ thermal_intr_init_core_clear_mask(); ++ thermal_intr_init_pkg_clear_mask(); ++ + rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); + if (cpu_has(c, X86_FEATURE_PLN) && !int_pln_enable) + wrmsr(MSR_IA32_THERM_INTERRUPT, +-- +2.35.3 + diff --git a/series.conf b/series.conf index 57a54e4..8dc1241 100644 --- a/series.conf +++ b/series.conf @@ -2337,6 +2337,7 @@ patches.kernel.org/6.2.12-107-net-phy-nxp-c45-tja11xx-add-remove-callback.patch patches.kernel.org/6.2.12-108-net-phy-nxp-c45-tja11xx-fix-unsigned-long-mult.patch patches.kernel.org/6.2.12-109-scsi-ses-Handle-enclosure-with-just-a-primary-.patch + patches.kernel.org/6.2.12-110-thermal-intel-Avoid-updating-unsupported-THERM.patch ######################################################## # Build fixes that apply to the vanilla kernel too.