|
Borislav Petkov |
24132f |
From: Peter Zijlstra <peterz@infradead.org>
|
|
Borislav Petkov |
24132f |
Date: Tue, 14 Jun 2022 23:15:58 +0200
|
|
Borislav Petkov |
24132f |
Subject: intel_idle: Disable IBRS during long idle
|
|
Borislav Petkov |
24132f |
Git-commit: bf5835bcdb9635c97f85120dba9bfa21e111130f
|
|
Borislav Petkov |
d06c64 |
Patch-mainline: v5.19-rc4
|
|
Borislav Petkov |
24132f |
References: bsc#1199657 CVE-2022-29900 CVE-2022-29901
|
|
Borislav Petkov |
24132f |
|
|
Borislav Petkov |
24132f |
Having IBRS enabled while the SMT sibling is idle unnecessarily slows
|
|
Borislav Petkov |
24132f |
down the running sibling. OTOH, disabling IBRS around idle takes two
|
|
Borislav Petkov |
24132f |
MSR writes, which will increase the idle latency.
|
|
Borislav Petkov |
24132f |
|
|
Borislav Petkov |
24132f |
Therefore, only disable IBRS around deeper idle states. Shallow idle
|
|
Borislav Petkov |
24132f |
states are bounded by the tick in duration, since NOHZ is not allowed
|
|
Borislav Petkov |
24132f |
for them by virtue of their short target residency.
|
|
Borislav Petkov |
24132f |
|
|
Borislav Petkov |
24132f |
Only do this for mwait-driven idle, since that keeps interrupts disabled
|
|
Borislav Petkov |
24132f |
across idle, which makes disabling IBRS vs IRQ-entry a non-issue.
|
|
Borislav Petkov |
24132f |
|
|
Borislav Petkov |
24132f |
Note: C6 is a random threshold, most importantly C1 probably shouldn't
|
|
Borislav Petkov |
24132f |
disable IBRS, benchmarking needed.
|
|
Borislav Petkov |
24132f |
|
|
Borislav Petkov |
24132f |
Suggested-by: Tim Chen <tim.c.chen@linux.intel.com>
|
|
Borislav Petkov |
24132f |
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
|
|
Borislav Petkov |
24132f |
Signed-off-by: Borislav Petkov <bp@suse.de>
|
|
Borislav Petkov |
24132f |
Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
|
|
Borislav Petkov |
24132f |
Signed-off-by: Borislav Petkov <bp@suse.de>
|
|
Borislav Petkov |
24132f |
---
|
|
Borislav Petkov |
24132f |
arch/x86/include/asm/nospec-branch.h | 1
|
|
Borislav Petkov |
24132f |
arch/x86/kernel/cpu/bugs.c | 6 +++
|
|
Borislav Petkov |
24132f |
drivers/idle/intel_idle.c | 64 +++++++++++++++++++++++++++++++----
|
|
Borislav Petkov |
24132f |
3 files changed, 65 insertions(+), 6 deletions(-)
|
|
Borislav Petkov |
24132f |
|
|
Borislav Petkov |
24132f |
--- a/arch/x86/include/asm/nospec-branch.h
|
|
Borislav Petkov |
24132f |
+++ b/arch/x86/include/asm/nospec-branch.h
|
|
Borislav Petkov |
24132f |
@@ -345,6 +345,7 @@ static inline void unrestrict_branch_spe
|
|
Borislav Petkov |
24132f |
/* The Intel SPEC CTRL MSR base value cache */
|
|
Borislav Petkov |
24132f |
extern u64 x86_spec_ctrl_base;
|
|
Borislav Petkov |
24132f |
extern void write_spec_ctrl_current(u64 val, bool force);
|
|
Borislav Petkov |
24132f |
+extern u64 spec_ctrl_current(void);
|
|
Borislav Petkov |
24132f |
|
|
Borislav Petkov |
24132f |
/*
|
|
Borislav Petkov |
24132f |
* With retpoline, we must use IBRS to restrict branch prediction
|
|
Borislav Petkov |
24132f |
--- a/arch/x86/kernel/cpu/bugs.c
|
|
Borislav Petkov |
24132f |
+++ b/arch/x86/kernel/cpu/bugs.c
|
|
Borislav Petkov |
24132f |
@@ -76,6 +76,12 @@ void write_spec_ctrl_current(u64 val, bo
|
|
Borislav Petkov |
24132f |
wrmsrl(MSR_IA32_SPEC_CTRL, val);
|
|
Borislav Petkov |
24132f |
}
|
|
Borislav Petkov |
24132f |
|
|
Borislav Petkov |
24132f |
+u64 spec_ctrl_current(void)
|
|
Borislav Petkov |
24132f |
+{
|
|
Borislav Petkov |
24132f |
+ return this_cpu_read(x86_spec_ctrl_current);
|
|
Borislav Petkov |
24132f |
+}
|
|
Borislav Petkov |
24132f |
+EXPORT_SYMBOL_GPL(spec_ctrl_current);
|
|
Borislav Petkov |
24132f |
+
|
|
Borislav Petkov |
24132f |
/*
|
|
Borislav Petkov |
24132f |
* The vendor and possibly platform specific bits which can be modified in
|
|
Borislav Petkov |
24132f |
* x86_spec_ctrl_base.
|
|
Borislav Petkov |
24132f |
--- a/drivers/idle/intel_idle.c
|
|
Borislav Petkov |
24132f |
+++ b/drivers/idle/intel_idle.c
|
|
Borislav Petkov |
24132f |
@@ -56,11 +56,13 @@
|
|
Borislav Petkov |
24132f |
#include <linux/tick.h>
|
|
Borislav Petkov |
24132f |
#include <trace/events/power.h>
|
|
Borislav Petkov |
24132f |
#include <linux/sched.h>
|
|
Borislav Petkov |
24132f |
+#include <linux/sched/smt.h>
|
|
Borislav Petkov |
24132f |
#include <linux/notifier.h>
|
|
Borislav Petkov |
24132f |
#include <linux/cpu.h>
|
|
Borislav Petkov |
24132f |
#include <linux/moduleparam.h>
|
|
Borislav Petkov |
24132f |
#include <asm/cpu_device_id.h>
|
|
Borislav Petkov |
24132f |
#include <asm/intel-family.h>
|
|
Borislav Petkov |
24132f |
+#include <asm/nospec-branch.h>
|
|
Borislav Petkov |
24132f |
#include <asm/mwait.h>
|
|
Borislav Petkov |
24132f |
#include <asm/msr.h>
|
|
Borislav Petkov |
24132f |
|
|
Borislav Petkov |
24132f |
@@ -101,6 +103,12 @@ static void intel_idle_freeze(struct cpu
|
|
Borislav Petkov |
24132f |
static struct cpuidle_state *cpuidle_state_table;
|
|
Borislav Petkov |
24132f |
|
|
Borislav Petkov |
24132f |
/*
|
|
Borislav Petkov |
24132f |
+ * Disable IBRS across idle (when KERNEL_IBRS), is exclusive vs IRQ_ENABLE
|
|
Borislav Petkov |
24132f |
+ * above.
|
|
Borislav Petkov |
24132f |
+ */
|
|
Borislav Petkov |
24132f |
+#define CPUIDLE_FLAG_IBRS BIT(16)
|
|
Borislav Petkov |
24132f |
+
|
|
Borislav Petkov |
24132f |
+/*
|
|
Borislav Petkov |
24132f |
* Set this flag for states where the HW flushes the TLB for us
|
|
Borislav Petkov |
24132f |
* and so we don't need cross-calls to keep it consistent.
|
|
Borislav Petkov |
24132f |
* If this flag is set, SW flushes the TLB, so even if the
|
|
Borislav Petkov |
24132f |
@@ -616,7 +624,7 @@ static struct cpuidle_state skl_cstates[
|
|
Borislav Petkov |
24132f |
{
|
|
Borislav Petkov |
24132f |
.name = "C6",
|
|
Borislav Petkov |
24132f |
.desc = "MWAIT 0x20",
|
|
Borislav Petkov |
24132f |
- .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
|
|
Borislav Petkov |
24132f |
+ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
|
|
Borislav Petkov |
24132f |
.exit_latency = 85,
|
|
Borislav Petkov |
24132f |
.target_residency = 200,
|
|
Borislav Petkov |
24132f |
.enter = &intel_idle,
|
|
Borislav Petkov |
24132f |
@@ -624,7 +632,7 @@ static struct cpuidle_state skl_cstates[
|
|
Borislav Petkov |
24132f |
{
|
|
Borislav Petkov |
24132f |
.name = "C7s",
|
|
Borislav Petkov |
24132f |
.desc = "MWAIT 0x33",
|
|
Borislav Petkov |
24132f |
- .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED,
|
|
Borislav Petkov |
24132f |
+ .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
|
|
Borislav Petkov |
24132f |
.exit_latency = 124,
|
|
Borislav Petkov |
24132f |
.target_residency = 800,
|
|
Borislav Petkov |
24132f |
.enter = &intel_idle,
|
|
Borislav Petkov |
24132f |
@@ -632,7 +640,7 @@ static struct cpuidle_state skl_cstates[
|
|
Borislav Petkov |
24132f |
{
|
|
Borislav Petkov |
24132f |
.name = "C8",
|
|
Borislav Petkov |
24132f |
.desc = "MWAIT 0x40",
|
|
Borislav Petkov |
24132f |
- .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
|
|
Borislav Petkov |
24132f |
+ .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
|
|
Borislav Petkov |
24132f |
.exit_latency = 200,
|
|
Borislav Petkov |
24132f |
.target_residency = 800,
|
|
Borislav Petkov |
24132f |
.enter = &intel_idle,
|
|
Borislav Petkov |
24132f |
@@ -640,7 +648,7 @@ static struct cpuidle_state skl_cstates[
|
|
Borislav Petkov |
24132f |
{
|
|
Borislav Petkov |
24132f |
.name = "C9",
|
|
Borislav Petkov |
24132f |
.desc = "MWAIT 0x50",
|
|
Borislav Petkov |
24132f |
- .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
|
|
Borislav Petkov |
24132f |
+ .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
|
|
Borislav Petkov |
24132f |
.exit_latency = 480,
|
|
Borislav Petkov |
24132f |
.target_residency = 5000,
|
|
Borislav Petkov |
24132f |
.enter = &intel_idle,
|
|
Borislav Petkov |
24132f |
@@ -648,7 +656,7 @@ static struct cpuidle_state skl_cstates[
|
|
Borislav Petkov |
24132f |
{
|
|
Borislav Petkov |
24132f |
.name = "C10",
|
|
Borislav Petkov |
24132f |
.desc = "MWAIT 0x60",
|
|
Borislav Petkov |
24132f |
- .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
|
|
Borislav Petkov |
24132f |
+ .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
|
|
Borislav Petkov |
24132f |
.exit_latency = 890,
|
|
Borislav Petkov |
24132f |
.target_residency = 5000,
|
|
Borislav Petkov |
24132f |
.enter = &intel_idle,
|
|
Borislav Petkov |
24132f |
@@ -677,7 +685,7 @@ static struct cpuidle_state skx_cstates[
|
|
Borislav Petkov |
24132f |
{
|
|
Borislav Petkov |
24132f |
.name = "C6",
|
|
Borislav Petkov |
24132f |
.desc = "MWAIT 0x20",
|
|
Borislav Petkov |
24132f |
- .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
|
|
Borislav Petkov |
24132f |
+ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
|
|
Borislav Petkov |
24132f |
.exit_latency = 133,
|
|
Borislav Petkov |
24132f |
.target_residency = 600,
|
|
Borislav Petkov |
24132f |
.enter = &intel_idle,
|
|
Borislav Petkov |
24132f |
@@ -934,6 +942,46 @@ static __cpuidle int intel_idle(struct c
|
|
Borislav Petkov |
24132f |
return index;
|
|
Borislav Petkov |
24132f |
}
|
|
Borislav Petkov |
24132f |
|
|
Borislav Petkov |
24132f |
+/*
|
|
Borislav Petkov |
24132f |
+ * MWAIT takes an 8-bit "hint" in EAX "suggesting"
|
|
Borislav Petkov |
24132f |
+ * the C-state (top nibble) and sub-state (bottom nibble)
|
|
Borislav Petkov |
24132f |
+ * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc.
|
|
Borislav Petkov |
24132f |
+ *
|
|
Borislav Petkov |
24132f |
+ * We store the hint at the top of our "flags" for each state.
|
|
Borislav Petkov |
24132f |
+ */
|
|
Borislav Petkov |
24132f |
+#define flg2MWAIT(flags) (((flags) >> 24) & 0xFF)
|
|
Borislav Petkov |
24132f |
+#define MWAIT2flg(eax) ((eax & 0xFF) << 24)
|
|
Borislav Petkov |
24132f |
+
|
|
Borislav Petkov |
24132f |
+static __always_inline int __intel_idle(struct cpuidle_device *dev,
|
|
Borislav Petkov |
24132f |
+ struct cpuidle_driver *drv, int index)
|
|
Borislav Petkov |
24132f |
+{
|
|
Borislav Petkov |
24132f |
+ struct cpuidle_state *state = &drv->states[index];
|
|
Borislav Petkov |
24132f |
+ unsigned long eax = flg2MWAIT(state->flags);
|
|
Borislav Petkov |
24132f |
+ unsigned long ecx = 1; /* break on interrupt flag */
|
|
Borislav Petkov |
24132f |
+
|
|
Borislav Petkov |
24132f |
+ mwait_idle_with_hints(eax, ecx);
|
|
Borislav Petkov |
24132f |
+
|
|
Borislav Petkov |
24132f |
+ return index;
|
|
Borislav Petkov |
24132f |
+}
|
|
Borislav Petkov |
24132f |
+
|
|
Borislav Petkov |
24132f |
+static __cpuidle int intel_idle_ibrs(struct cpuidle_device *dev,
|
|
Borislav Petkov |
24132f |
+ struct cpuidle_driver *drv, int index)
|
|
Borislav Petkov |
24132f |
+{
|
|
Borislav Petkov |
24132f |
+ bool smt_active = sched_smt_active();
|
|
Borislav Petkov |
24132f |
+ u64 spec_ctrl = spec_ctrl_current();
|
|
Borislav Petkov |
24132f |
+ int ret;
|
|
Borislav Petkov |
24132f |
+
|
|
Borislav Petkov |
24132f |
+ if (smt_active)
|
|
Borislav Petkov |
24132f |
+ wrmsrl(MSR_IA32_SPEC_CTRL, 0);
|
|
Borislav Petkov |
24132f |
+
|
|
Borislav Petkov |
24132f |
+ ret = __intel_idle(dev, drv, index);
|
|
Borislav Petkov |
24132f |
+
|
|
Borislav Petkov |
24132f |
+ if (smt_active)
|
|
Borislav Petkov |
24132f |
+ wrmsrl(MSR_IA32_SPEC_CTRL, spec_ctrl);
|
|
Borislav Petkov |
24132f |
+
|
|
Borislav Petkov |
24132f |
+ return ret;
|
|
Borislav Petkov |
24132f |
+}
|
|
Borislav Petkov |
24132f |
+
|
|
Borislav Petkov |
24132f |
/**
|
|
Borislav Petkov |
24132f |
* intel_idle_freeze - simplified "enter" callback routine for suspend-to-idle
|
|
Borislav Petkov |
24132f |
* @dev: cpuidle_device
|
|
Borislav Petkov |
24132f |
@@ -1366,6 +1414,10 @@ static void __init intel_idle_cpuidle_dr
|
|
Borislav Petkov |
24132f |
continue;
|
|
Borislav Petkov |
24132f |
}
|
|
Borislav Petkov |
24132f |
|
|
Borislav Petkov |
727875 |
+ if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) &&
|
|
Borislav Petkov |
24132f |
+ cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IBRS) {
|
|
Borislav Petkov |
24132f |
+ drv->states[drv->state_count].enter = intel_idle_ibrs;
|
|
Borislav Petkov |
24132f |
+ }
|
|
Borislav Petkov |
24132f |
|
|
Borislav Petkov |
24132f |
if (((mwait_cstate + 1) > 2) &&
|
|
Borislav Petkov |
24132f |
!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
|