Blob Blame History Raw
From 73a8594bdc5d88bdb125e458a4147669b8ff1cd1 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Fri, 27 Apr 2018 09:47:37 -0700
Subject: [PATCH 3/8] x86, l1tf: Protect PROT_NONE PTEs against speculation
Patch-mainline: v4.19-rc1
Git-commit: 6b28baca9b1f0d4a42b865da7a05b1c81424bd5c
References: bnc#1087081, CVE-2018-3620

We also need to protect PTEs that are set to PROT_NONE against
L1TF speculation attacks.

This is important inside guests, because L1TF speculation
bypasses physical page remapping. While the VM has its own
migitations preventing leaking data from other VMs into
the guest, this would still risk leaking the wrong page
inside the current guest.

This uses the same technique as Linus' swap entry patch:
while an entry is is in PROTNONE state we invert the
complete PFN part part of it. This ensures that the
the highest bit will point to non existing memory.

The invert is done by pte/pmd_modify and pfn/pmd/pud_pte for
PROTNONE and pte/pmd/pud_pfn undo it.

We assume that noone tries to touch the PFN part of
a PTE without using these primitives.

This doesn't handle the case that MMIO is on the top
of the CPU physical memory. If such an MMIO region
was exposed by an unpriviledged driver for mmap
it would be possible to attack some real memory.
However this situation is all rather unlikely.

For 32bit non PAE we don't try inversion because
there are really not enough bits to protect anything.

Q: Why does the guest need to be protected when the
HyperVisor already has L1TF mitigations?
A: Here's an example:
You have physical pages 1 2. They get mapped into a guest as
GPA 1 -> PA 2
GPA 2 -> PA 1
through EPT.

The L1TF speculation ignores the EPT remapping.

Now the guest kernel maps GPA 1 to process A and GPA 2 to process B,
and they belong to different users and should be isolated.

A sets the GPA 1 PA 2 PTE to PROT_NONE to bypass the EPT remapping
and gets read access to the underlying physical page. Which
in this case points to PA 2, so it can read process B's data,
if it happened to be in L1.

So we broke isolation inside the guest.

There's nothing the hypervisor can do about this. This
mitigation has to be done in the guest.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-By: Dave Hansen <dave.hansen@intel.com>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>

---
v2: Use new helper to generate XOR mask to invert (Linus)
v3: Use inline helper for protnone mask checking
v4: Use inline helpers to check for PROT_NONE changes
---
 arch/x86/include/asm/pgtable-2level.h |   17 +++++++++++++
 arch/x86/include/asm/pgtable-3level.h |    2 +
 arch/x86/include/asm/pgtable-invert.h |   32 ++++++++++++++++++++++++
 arch/x86/include/asm/pgtable.h        |   44 +++++++++++++++++++++++-----------
 arch/x86/include/asm/pgtable_64.h     |    2 +
 5 files changed, 84 insertions(+), 13 deletions(-)
 create mode 100644 arch/x86/include/asm/pgtable-invert.h

--- a/arch/x86/include/asm/pgtable-2level.h
+++ b/arch/x86/include/asm/pgtable-2level.h
@@ -94,4 +94,21 @@ static inline unsigned long pte_bitop(un
 #define __pte_to_swp_entry(pte)		((swp_entry_t) { (pte).pte_low })
 #define __swp_entry_to_pte(x)		((pte_t) { .pte = (x).val })
 
+/* No inverted PFNs on 2 level page tables */
+
+static inline u64 protnone_mask(u64 val)
+{
+	return 0;
+}
+
+static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask)
+{
+	return val;
+}
+
+static inline bool __pte_needs_invert(u64 val)
+{
+	return false;
+}
+
 #endif /* _ASM_X86_PGTABLE_2LEVEL_H */
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -212,4 +212,6 @@ static inline pud_t native_pudp_get_and_
 #define __pte_to_swp_entry(pte)		((swp_entry_t){ (pte).pte_high })
 #define __swp_entry_to_pte(x)		((pte_t){ { .pte_high = (x).val } })
 
+#include <asm/pgtable-invert.h>
+
 #endif /* _ASM_X86_PGTABLE_3LEVEL_H */
--- /dev/null
+++ b/arch/x86/include/asm/pgtable-invert.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_PGTABLE_INVERT_H
+#define _ASM_PGTABLE_INVERT_H 1
+
+#ifndef __ASSEMBLY__
+
+static inline bool __pte_needs_invert(u64 val)
+{
+	return (val & (_PAGE_PRESENT|_PAGE_PROTNONE)) == _PAGE_PROTNONE;
+}
+
+/* Get a mask to xor with the page table entry to get the correct pfn. */
+static inline u64 protnone_mask(u64 val)
+{
+	return __pte_needs_invert(val) ?  ~0ull : 0;
+}
+
+static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask)
+{
+	/*
+	 * When a PTE transitions from NONE to !NONE or vice-versa
+	 * invert the PFN part to stop speculation.
+	 * pte_pfn undoes this when needed.
+	 */
+	if (__pte_needs_invert(oldval) != __pte_needs_invert(val))
+		val = (val & ~mask) | (~val & mask);
+	return val;
+}
+
+#endif /* __ASSEMBLY__ */
+
+#endif
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -188,19 +188,29 @@ static inline int pte_special(pte_t pte)
 	return pte_flags(pte) & _PAGE_SPECIAL;
 }
 
+/* Entries that were set to PROT_NONE are inverted */
+
+static inline u64 protnone_mask(u64 val);
+
 static inline unsigned long pte_pfn(pte_t pte)
 {
-	return (pte_val(pte) & PTE_PFN_MASK) >> PAGE_SHIFT;
+	unsigned long pfn = pte_val(pte);
+	pfn ^= protnone_mask(pfn);
+	return (pfn & PTE_PFN_MASK) >> PAGE_SHIFT;
 }
 
 static inline unsigned long pmd_pfn(pmd_t pmd)
 {
-	return (pmd_val(pmd) & pmd_pfn_mask(pmd)) >> PAGE_SHIFT;
+	unsigned long pfn = pmd_val(pmd);
+	pfn ^= protnone_mask(pfn);
+	return (pfn & pmd_pfn_mask(pmd)) >> PAGE_SHIFT;
 }
 
 static inline unsigned long pud_pfn(pud_t pud)
 {
-	return (pud_val(pud) & pud_pfn_mask(pud)) >> PAGE_SHIFT;
+	unsigned long pfn = pud_val(pud);
+	pfn ^= protnone_mask(pfn);
+	return (pfn & pud_pfn_mask(pud)) >> PAGE_SHIFT;
 }
 
 static inline unsigned long p4d_pfn(p4d_t p4d)
@@ -526,25 +536,33 @@ static inline pgprotval_t massage_pgprot
 
 static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot)
 {
-	return __pte(((phys_addr_t)page_nr << PAGE_SHIFT) |
-		     massage_pgprot(pgprot));
+	phys_addr_t pfn = page_nr << PAGE_SHIFT;
+	pfn ^= protnone_mask(pgprot_val(pgprot));
+	pfn &= PTE_PFN_MASK;
+	return __pte(pfn | massage_pgprot(pgprot));
 }
 
 static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
 {
-	return __pmd(((phys_addr_t)page_nr << PAGE_SHIFT) |
-		     massage_pgprot(pgprot));
+	phys_addr_t pfn = page_nr << PAGE_SHIFT;
+	pfn ^= protnone_mask(pgprot_val(pgprot));
+	pfn &= PHYSICAL_PMD_PAGE_MASK;
+	return __pmd(pfn | massage_pgprot(pgprot));
 }
 
 static inline pud_t pfn_pud(unsigned long page_nr, pgprot_t pgprot)
 {
-	return __pud(((phys_addr_t)page_nr << PAGE_SHIFT) |
-		     massage_pgprot(pgprot));
+	phys_addr_t pfn = page_nr << PAGE_SHIFT;
+	pfn ^= protnone_mask(pgprot_val(pgprot));
+	pfn &= PHYSICAL_PUD_PAGE_MASK;
+	return __pud(pfn | massage_pgprot(pgprot));
 }
 
+static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask);
+
 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 {
-	pteval_t val = pte_val(pte);
+	pteval_t val = pte_val(pte), oldval = val;
 
 	/*
 	 * Chop off the NX bit (if present), and add the NX portion of
@@ -552,17 +570,17 @@ static inline pte_t pte_modify(pte_t pte
 	 */
 	val &= _PAGE_CHG_MASK;
 	val |= massage_pgprot(newprot) & ~_PAGE_CHG_MASK;
-
+	val = flip_protnone_guard(oldval, val, PTE_PFN_MASK);
 	return __pte(val);
 }
 
 static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
 {
-	pmdval_t val = pmd_val(pmd);
+	pmdval_t val = pmd_val(pmd), oldval = val;
 
 	val &= _HPAGE_CHG_MASK;
 	val |= massage_pgprot(newprot) & ~_HPAGE_CHG_MASK;
-
+	val = flip_protnone_guard(oldval, val, PHYSICAL_PMD_PAGE_MASK);
 	return __pmd(val);
 }
 
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -335,6 +335,8 @@ extern void cleanup_highmap(void);
 extern void init_extra_mapping_uc(unsigned long phys, unsigned long size);
 extern void init_extra_mapping_wb(unsigned long phys, unsigned long size);
 
+#include <asm/pgtable-invert.h>
+
 #endif /* !__ASSEMBLY__ */
 
 #endif /* _ASM_X86_PGTABLE_64_H */