Blob Blame History Raw
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 27 Oct 2022 14:54:41 -0700
Subject: x86/mm: Randomize per-cpu entry area
Git-commit: 97e3d26b5e5f371b3ee223d94dd123e6c442ba80
Patch-mainline: v6.2-rc1
References: bsc#1207845 CVE-2023-0597

Seth found that the CPU-entry-area; the piece of per-cpu data that is
mapped into the userspace page-tables for kPTI is not subject to any
randomization -- irrespective of kASLR settings.

On x86_64 a whole P4D (512 GB) of virtual address space is reserved for
this structure, which is plenty large enough to randomize things a
little.

As such, use a straight forward randomization scheme that avoids
duplicates to spread the existing CPUs over the available space.

  [ bp: Fix le build. ]

Reported-by: Seth Jenkins <sethjenkins@google.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
[mkoutny: v5.14 backport: init_cea_offsets() is called way before
	  prandom_init_early() initcall, prandom_u32_max() is not properly
	  seeded yet. Use KASLR seed and local state to generate CPU entry
	  areas offsets, this is based on the approach in
	  kernel_randomize_memory() and should provide same randomness
	  guarantees -- beware we don't get cryptographically secure random
	  offsets. This reduces effective entropy in exfiltrating *all* CPU
	  entry areas by log2(nr_cpus) bits, entropy for *any* CPU is
	  unaffected.
	  This was chosen instead of backporting f62384995e4c ("random: split
	  initialization into early step and later step") and crng related
	  reworks.]
[mkoutny: v5.3 backport: dropped hw_breakpoint hunk without 24ae0c91cbc5
	  ("x86/hw_breakpoint: Prevent data breakpoints on cpu_entry_area"),
	  adjusted context for missing doublefault_stack on 32b]
[mkoutny: v4.12 backport: adjust context for missing 7623f37e4111
	  ("x86/cpu_entry_area: Provide exception stack accessor"]
Acked-by: Michal Koutný <mkoutny@suse.com>
---
 arch/x86/include/asm/cpu_entry_area.h |   10 +++++--
 arch/x86/mm/cpu_entry_area.c          |   47 +++++++++++++++++++++++++++++++++-
 2 files changed, 53 insertions(+), 4 deletions(-)

--- a/arch/x86/include/asm/cpu_entry_area.h
+++ b/arch/x86/include/asm/cpu_entry_area.h
@@ -56,7 +56,6 @@ struct cpu_entry_area {
 };
 
 #define CPU_ENTRY_AREA_SIZE	(sizeof(struct cpu_entry_area))
-#define CPU_ENTRY_AREA_TOT_SIZE	(CPU_ENTRY_AREA_SIZE * NR_CPUS)
 
 DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
 
@@ -68,8 +67,13 @@ extern void cea_set_pte(void *cea_vaddr,
 
 #define CPU_ENTRY_AREA_RO_IDT_VADDR	((void *)CPU_ENTRY_AREA_RO_IDT)
 
-#define CPU_ENTRY_AREA_MAP_SIZE			\
-	(CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_TOT_SIZE - CPU_ENTRY_AREA_BASE)
+#ifdef CONFIG_X86_32
+#define CPU_ENTRY_AREA_MAP_SIZE		(CPU_ENTRY_AREA_PER_CPU +		\
+					 (CPU_ENTRY_AREA_SIZE * NR_CPUS) -	\
+					 CPU_ENTRY_AREA_BASE)
+#else
+#define CPU_ENTRY_AREA_MAP_SIZE		P4D_SIZE
+#endif
 
 extern struct cpu_entry_area *get_cpu_entry_area(int cpu);
 
--- a/arch/x86/mm/cpu_entry_area.c
+++ b/arch/x86/mm/cpu_entry_area.c
@@ -2,6 +2,7 @@
 
 #include <linux/spinlock.h>
 #include <linux/percpu.h>
+#include <linux/random.h>
 
 #include <asm/cpu_entry_area.h>
 #include <asm/pgtable.h>
@@ -13,11 +14,53 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(struc
 #ifdef CONFIG_X86_64
 static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
 	[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
+
+static DEFINE_PER_CPU_READ_MOSTLY(unsigned long, _cea_offset);
+
+static __always_inline unsigned int cea_offset(unsigned int cpu)
+{
+	return per_cpu(_cea_offset, cpu);
+}
+
+static __init void init_cea_offsets(void)
+{
+	struct rnd_state rand_state;
+	unsigned int max_cea, rand;
+	unsigned int i, j;
+
+	max_cea = (CPU_ENTRY_AREA_MAP_SIZE - PAGE_SIZE) / CPU_ENTRY_AREA_SIZE;
+	prandom_seed_state(&rand_state, kaslr_get_random_long("CPU entry"));
+
+	/* O(sodding terrible) */
+	for_each_possible_cpu(i) {
+		unsigned int cea;
+
+again:
+		prandom_bytes_state(&rand_state, &rand, sizeof(rand));
+		cea = rand % max_cea;
+
+		for_each_possible_cpu(j) {
+			if (cea_offset(j) == cea)
+				goto again;
+
+			if (i == j)
+				break;
+		}
+
+		per_cpu(_cea_offset, i) = cea;
+	}
+}
+#else /* !X86_64 */
+static __always_inline unsigned int cea_offset(unsigned int cpu)
+{
+	return cpu;
+}
+static inline void init_cea_offsets(void) { }
 #endif
 
 struct cpu_entry_area *get_cpu_entry_area(int cpu)
 {
-	unsigned long va = CPU_ENTRY_AREA_PER_CPU + cpu * CPU_ENTRY_AREA_SIZE;
+	unsigned long va = CPU_ENTRY_AREA_PER_CPU + cea_offset(cpu) * CPU_ENTRY_AREA_SIZE;
 	BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0);
 
 	return (struct cpu_entry_area *) va;
@@ -158,6 +201,8 @@ void __init setup_cpu_entry_areas(void)
 {
 	unsigned int cpu;
 
+	init_cea_offsets();
+
 	setup_cpu_entry_area_ptes();
 
 	for_each_possible_cpu(cpu)