Borislav Petkov 758e87
From: Peter Zijlstra <peterz@infradead.org>
Borislav Petkov 758e87
Date: Tue, 26 Oct 2021 14:01:42 +0200
Borislav Petkov 758e87
Subject: x86/alternative: Implement .retpoline_sites support
Borislav Petkov 758e87
Git-commit: 7508500900814d14e2e085cdc4e28142721abbdf
Borislav Petkov 758e87
Patch-mainline: v5.16-rc1
Borislav Petkov 758e87
References: bsc#1190497
Borislav Petkov 758e87
Borislav Petkov 758e87
Rewrite retpoline thunk call sites to be indirect calls for
Borislav Petkov 758e87
spectre_v2=off. This ensures spectre_v2=off is as near to a
Borislav Petkov 758e87
RETPOLINE=n build as possible.
Borislav Petkov 758e87
Borislav Petkov 758e87
This is the replacement for objtool writing alternative entries to
Borislav Petkov 758e87
ensure the same and achieves feature-parity with the previous
Borislav Petkov 758e87
approach.
Borislav Petkov 758e87
Borislav Petkov 758e87
One noteworthy feature is that it relies on the thunks to be in
Borislav Petkov 758e87
machine order to compute the register index.
Borislav Petkov 758e87
Borislav Petkov 758e87
Specifically, this does not yet address the Jcc __x86_indirect_thunk_*
Borislav Petkov 758e87
calls generated by clang, a future patch will add this.
Borislav Petkov 758e87
Borislav Petkov 758e87
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Borislav Petkov 758e87
Reviewed-by: Borislav Petkov <bp@suse.de>
Borislav Petkov 758e87
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Borislav Petkov 758e87
Tested-by: Alexei Starovoitov <ast@kernel.org>
Borislav Petkov 758e87
Link: https://lore.kernel.org/r/20211026120310.232495794@infradead.org
Borislav Petkov 758e87
Borislav Petkov 758e87
Acked-by: Borislav Petkov <bp@suse.de>
Borislav Petkov 758e87
---
Borislav Petkov 758e87
 arch/um/kernel/um_arch.c           |   4 ++
Borislav Petkov 758e87
 arch/x86/include/asm/alternative.h |   1 +
Borislav Petkov 758e87
 arch/x86/kernel/alternative.c      | 141 +++++++++++++++++++++++++++++++++++--
Borislav Petkov 758e87
 arch/x86/kernel/module.c           |   9 ++-
Borislav Petkov 758e87
 4 files changed, 150 insertions(+), 5 deletions(-)
Borislav Petkov 758e87
Borislav Petkov 758e87
diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
Borislav Petkov 758e87
index a149a5e9a16a..54447690de11 100644
Borislav Petkov 758e87
--- a/arch/um/kernel/um_arch.c
Borislav Petkov 758e87
+++ b/arch/um/kernel/um_arch.c
Borislav Petkov 758e87
@@ -421,6 +421,10 @@ void __init check_bugs(void)
Borislav Petkov 758e87
 	os_check_bugs();
Borislav Petkov 758e87
 }
Borislav Petkov 758e87
 
Borislav Petkov 758e87
+void apply_retpolines(s32 *start, s32 *end)
Borislav Petkov 758e87
+{
Borislav Petkov 758e87
+}
Borislav Petkov 758e87
+
Borislav Petkov 758e87
 void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
Borislav Petkov 758e87
 {
Borislav Petkov 758e87
 }
Borislav Petkov 758e87
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
Borislav Petkov 758e87
index a3c2315aca12..58eee6402832 100644
Borislav Petkov 758e87
--- a/arch/x86/include/asm/alternative.h
Borislav Petkov 758e87
+++ b/arch/x86/include/asm/alternative.h
Borislav Petkov 758e87
@@ -75,6 +75,7 @@ extern int alternatives_patched;
Borislav Petkov 758e87
 
Borislav Petkov 758e87
 extern void alternative_instructions(void);
Borislav Petkov 758e87
 extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
Borislav Petkov 758e87
+extern void apply_retpolines(s32 *start, s32 *end);
Borislav Petkov 758e87
 
Borislav Petkov 758e87
 struct module;
Borislav Petkov 758e87
 
Borislav Petkov 758e87
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
Borislav Petkov 758e87
index e9da3dc71254..5df403450359 100644
Borislav Petkov 758e87
--- a/arch/x86/kernel/alternative.c
Borislav Petkov 758e87
+++ b/arch/x86/kernel/alternative.c
Borislav Petkov 758e87
@@ -29,6 +29,7 @@
Borislav Petkov 758e87
 #include <asm/io.h>
Borislav Petkov 758e87
 #include <asm/fixmap.h>
Borislav Petkov 758e87
 #include <asm/paravirt.h>
Borislav Petkov 758e87
+#include <asm/asm-prototypes.h>
Borislav Petkov 758e87
 
Borislav Petkov 758e87
 int __read_mostly alternatives_patched;
Borislav Petkov 758e87
 
Borislav Petkov 758e87
@@ -113,6 +114,7 @@ static void __init_or_module add_nops(void *insns, unsigned int len)
Borislav Petkov 758e87
 	}
Borislav Petkov 758e87
 }
Borislav Petkov 758e87
 
Borislav Petkov 758e87
+extern s32 __retpoline_sites[], __retpoline_sites_end[];
Borislav Petkov 758e87
 extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
Borislav Petkov 758e87
 extern s32 __smp_locks[], __smp_locks_end[];
Borislav Petkov 758e87
 void text_poke_early(void *addr, const void *opcode, size_t len);
Borislav Petkov 758e87
@@ -221,7 +223,7 @@ static __always_inline int optimize_nops_range(u8 *instr, u8 instrlen, int off)
Borislav Petkov 758e87
  * "noinline" to cause control flow change and thus invalidate I$ and
Borislav Petkov 758e87
  * cause refetch after modification.
Borislav Petkov 758e87
  */
Borislav Petkov 758e87
-static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr)
Borislav Petkov 758e87
+static void __init_or_module noinline optimize_nops(u8 *instr, size_t len)
Borislav Petkov 758e87
 {
Borislav Petkov 758e87
 	struct insn insn;
Borislav Petkov 758e87
 	int i = 0;
Borislav Petkov 758e87
@@ -239,11 +241,11 @@ static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *ins
Borislav Petkov 758e87
 		 * optimized.
Borislav Petkov 758e87
 		 */
Borislav Petkov 758e87
 		if (insn.length == 1 && insn.opcode.bytes[0] == 0x90)
Borislav Petkov 758e87
-			i += optimize_nops_range(instr, a->instrlen, i);
Borislav Petkov 758e87
+			i += optimize_nops_range(instr, len, i);
Borislav Petkov 758e87
 		else
Borislav Petkov 758e87
 			i += insn.length;
Borislav Petkov 758e87
 
Borislav Petkov 758e87
-		if (i >= a->instrlen)
Borislav Petkov 758e87
+		if (i >= len)
Borislav Petkov 758e87
 			return;
Borislav Petkov 758e87
 	}
Borislav Petkov 758e87
 }
Borislav Petkov 758e87
@@ -331,10 +333,135 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
Borislav Petkov 758e87
 		text_poke_early(instr, insn_buff, insn_buff_sz);
Borislav Petkov 758e87
 
Borislav Petkov 758e87
 next:
Borislav Petkov 758e87
-		optimize_nops(a, instr);
Borislav Petkov 758e87
+		optimize_nops(instr, a->instrlen);
Borislav Petkov 758e87
 	}
Borislav Petkov 758e87
 }
Borislav Petkov 758e87
 
Borislav Petkov 758e87
+#if defined(CONFIG_RETPOLINE) && defined(CONFIG_STACK_VALIDATION)
Borislav Petkov 758e87
+
Borislav Petkov 758e87
+/*
Borislav Petkov 758e87
+ * CALL/JMP *%\reg
Borislav Petkov 758e87
+ */
Borislav Petkov 758e87
+static int emit_indirect(int op, int reg, u8 *bytes)
Borislav Petkov 758e87
+{
Borislav Petkov 758e87
+	int i = 0;
Borislav Petkov 758e87
+	u8 modrm;
Borislav Petkov 758e87
+
Borislav Petkov 758e87
+	switch (op) {
Borislav Petkov 758e87
+	case CALL_INSN_OPCODE:
Borislav Petkov 758e87
+		modrm = 0x10; /* Reg = 2; CALL r/m */
Borislav Petkov 758e87
+		break;
Borislav Petkov 758e87
+
Borislav Petkov 758e87
+	case JMP32_INSN_OPCODE:
Borislav Petkov 758e87
+		modrm = 0x20; /* Reg = 4; JMP r/m */
Borislav Petkov 758e87
+		break;
Borislav Petkov 758e87
+
Borislav Petkov 758e87
+	default:
Borislav Petkov 758e87
+		WARN_ON_ONCE(1);
Borislav Petkov 758e87
+		return -1;
Borislav Petkov 758e87
+	}
Borislav Petkov 758e87
+
Borislav Petkov 758e87
+	if (reg >= 8) {
Borislav Petkov 758e87
+		bytes[i++] = 0x41; /* REX.B prefix */
Borislav Petkov 758e87
+		reg -= 8;
Borislav Petkov 758e87
+	}
Borislav Petkov 758e87
+
Borislav Petkov 758e87
+	modrm |= 0xc0; /* Mod = 3 */
Borislav Petkov 758e87
+	modrm += reg;
Borislav Petkov 758e87
+
Borislav Petkov 758e87
+	bytes[i++] = 0xff; /* opcode */
Borislav Petkov 758e87
+	bytes[i++] = modrm;
Borislav Petkov 758e87
+
Borislav Petkov 758e87
+	return i;
Borislav Petkov 758e87
+}
Borislav Petkov 758e87
+
Borislav Petkov 758e87
+/*
Borislav Petkov 758e87
+ * Rewrite the compiler generated retpoline thunk calls.
Borislav Petkov 758e87
+ *
Borislav Petkov 758e87
+ * For spectre_v2=off (!X86_FEATURE_RETPOLINE), rewrite them into immediate
Borislav Petkov 758e87
+ * indirect instructions, avoiding the extra indirection.
Borislav Petkov 758e87
+ *
Borislav Petkov 758e87
+ * For example, convert:
Borislav Petkov 758e87
+ *
Borislav Petkov 758e87
+ *   CALL __x86_indirect_thunk_\reg
Borislav Petkov 758e87
+ *
Borislav Petkov 758e87
+ * into:
Borislav Petkov 758e87
+ *
Borislav Petkov 758e87
+ *   CALL *%\reg
Borislav Petkov 758e87
+ *
Borislav Petkov 758e87
+ */
Borislav Petkov 758e87
+static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes)
Borislav Petkov 758e87
+{
Borislav Petkov 758e87
+	retpoline_thunk_t *target;
Borislav Petkov 758e87
+	int reg, i = 0;
Borislav Petkov 758e87
+
Borislav Petkov 758e87
+	target = addr + insn->length + insn->immediate.value;
Borislav Petkov 758e87
+	reg = target - __x86_indirect_thunk_array;
Borislav Petkov 758e87
+
Borislav Petkov 758e87
+	if (WARN_ON_ONCE(reg & ~0xf))
Borislav Petkov 758e87
+		return -1;
Borislav Petkov 758e87
+
Borislav Petkov 758e87
+	/* If anyone ever does: CALL/JMP *%rsp, we're in deep trouble. */
Borislav Petkov 758e87
+	BUG_ON(reg == 4);
Borislav Petkov 758e87
+
Borislav Petkov 758e87
+	if (cpu_feature_enabled(X86_FEATURE_RETPOLINE))
Borislav Petkov 758e87
+		return -1;
Borislav Petkov 758e87
+
Borislav Petkov 758e87
+	i = emit_indirect(insn->opcode.bytes[0], reg, bytes);
Borislav Petkov 758e87
+	if (i < 0)
Borislav Petkov 758e87
+		return i;
Borislav Petkov 758e87
+
Borislav Petkov 758e87
+	for (; i < insn->length;)
Borislav Petkov 758e87
+		bytes[i++] = BYTES_NOP1;
Borislav Petkov 758e87
+
Borislav Petkov 758e87
+	return i;
Borislav Petkov 758e87
+}
Borislav Petkov 758e87
+
Borislav Petkov 758e87
+/*
Borislav Petkov 758e87
+ * Generated by 'objtool --retpoline'.
Borislav Petkov 758e87
+ */
Borislav Petkov 758e87
+void __init_or_module noinline apply_retpolines(s32 *start, s32 *end)
Borislav Petkov 758e87
+{
Borislav Petkov 758e87
+	s32 *s;
Borislav Petkov 758e87
+
Borislav Petkov 758e87
+	for (s = start; s < end; s++) {
Borislav Petkov 758e87
+		void *addr = (void *)s + *s;
Borislav Petkov 758e87
+		struct insn insn;
Borislav Petkov 758e87
+		int len, ret;
Borislav Petkov 758e87
+		u8 bytes[16];
Borislav Petkov 758e87
+		u8 op1, op2;
Borislav Petkov 758e87
+
Borislav Petkov 758e87
+		ret = insn_decode_kernel(&insn, addr);
Borislav Petkov 758e87
+		if (WARN_ON_ONCE(ret < 0))
Borislav Petkov 758e87
+			continue;
Borislav Petkov 758e87
+
Borislav Petkov 758e87
+		op1 = insn.opcode.bytes[0];
Borislav Petkov 758e87
+		op2 = insn.opcode.bytes[1];
Borislav Petkov 758e87
+
Borislav Petkov 758e87
+		switch (op1) {
Borislav Petkov 758e87
+		case CALL_INSN_OPCODE:
Borislav Petkov 758e87
+		case JMP32_INSN_OPCODE:
Borislav Petkov 758e87
+			break;
Borislav Petkov 758e87
+
Borislav Petkov 758e87
+		default:
Borislav Petkov 758e87
+			WARN_ON_ONCE(1);
Borislav Petkov 758e87
+			continue;
Borislav Petkov 758e87
+		}
Borislav Petkov 758e87
+
Borislav Petkov 758e87
+		len = patch_retpoline(addr, &insn, bytes);
Borislav Petkov 758e87
+		if (len == insn.length) {
Borislav Petkov 758e87
+			optimize_nops(bytes, len);
Borislav Petkov 758e87
+			text_poke_early(addr, bytes, len);
Borislav Petkov 758e87
+		}
Borislav Petkov 758e87
+	}
Borislav Petkov 758e87
+}
Borislav Petkov 758e87
+
Borislav Petkov 758e87
+#else /* !RETPOLINES || !CONFIG_STACK_VALIDATION */
Borislav Petkov 758e87
+
Borislav Petkov 758e87
+void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) { }
Borislav Petkov 758e87
+
Borislav Petkov 758e87
+#endif /* CONFIG_RETPOLINE && CONFIG_STACK_VALIDATION */
Borislav Petkov 758e87
+
Borislav Petkov 758e87
 #ifdef CONFIG_SMP
Borislav Petkov 758e87
 static void alternatives_smp_lock(const s32 *start, const s32 *end,
Borislav Petkov 758e87
 				  u8 *text, u8 *text_end)
Borislav Petkov 758e87
@@ -642,6 +769,12 @@ void __init alternative_instructions(void)
Borislav Petkov 758e87
 	 */
Borislav Petkov 758e87
 	apply_paravirt(__parainstructions, __parainstructions_end);
Borislav Petkov 758e87
 
Borislav Petkov 758e87
+	/*
Borislav Petkov 758e87
+	 * Rewrite the retpolines, must be done before alternatives since
Borislav Petkov 758e87
+	 * those can rewrite the retpoline thunks.
Borislav Petkov 758e87
+	 */
Borislav Petkov 758e87
+	apply_retpolines(__retpoline_sites, __retpoline_sites_end);
Borislav Petkov 758e87
+
Borislav Petkov 758e87
 	/*
Borislav Petkov 758e87
 	 * Then patch alternatives, such that those paravirt calls that are in
Borislav Petkov 758e87
 	 * alternatives can be overwritten by their immediate fragments.
Borislav Petkov 758e87
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
Borislav Petkov 758e87
index 5e9a34b5bd74..169fb6f4cd2e 100644
Borislav Petkov 758e87
--- a/arch/x86/kernel/module.c
Borislav Petkov 758e87
+++ b/arch/x86/kernel/module.c
Borislav Petkov 758e87
@@ -251,7 +251,8 @@ int module_finalize(const Elf_Ehdr *hdr,
Borislav Petkov 758e87
 		    struct module *me)
Borislav Petkov 758e87
 {
Borislav Petkov 758e87
 	const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL,
Borislav Petkov 758e87
-		*para = NULL, *orc = NULL, *orc_ip = NULL;
Borislav Petkov 758e87
+		*para = NULL, *orc = NULL, *orc_ip = NULL,
Borislav Petkov 758e87
+		*retpolines = NULL;
Borislav Petkov 758e87
 	char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
Borislav Petkov 758e87
 
Borislav Petkov 758e87
 	for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
Borislav Petkov 758e87
@@ -267,8 +268,14 @@ int module_finalize(const Elf_Ehdr *hdr,
Borislav Petkov 758e87
 			orc = s;
Borislav Petkov 758e87
 		if (!strcmp(".orc_unwind_ip", secstrings + s->sh_name))
Borislav Petkov 758e87
 			orc_ip = s;
Borislav Petkov 758e87
+		if (!strcmp(".retpoline_sites", secstrings + s->sh_name))
Borislav Petkov 758e87
+			retpolines = s;
Borislav Petkov 758e87
 	}
Borislav Petkov 758e87
 
Borislav Petkov 758e87
+	if (retpolines) {
Borislav Petkov 758e87
+		void *rseg = (void *)retpolines->sh_addr;
Borislav Petkov 758e87
+		apply_retpolines(rseg, rseg + retpolines->sh_size);
Borislav Petkov 758e87
+	}
Borislav Petkov 758e87
 	if (alt) {
Borislav Petkov 758e87
 		/* patch .altinstructions */
Borislav Petkov 758e87
 		void *aseg = (void *)alt->sh_addr;
Borislav Petkov 758e87