Joerg Roedel 0581a6
From: Paolo Bonzini <pbonzini@redhat.com>
Joerg Roedel 0581a6
Date: Tue, 29 Mar 2022 12:56:24 -0400
Joerg Roedel 0581a6
Subject: KVM: x86/mmu: do compare-and-exchange of gPTE via the user address
Joerg Roedel 0581a6
Git-commit: 2a8859f373b0a86f0ece8ec8312607eacf12485d
Joerg Roedel 0581a6
Patch-mainline: v5.18-rc1
Joerg Roedel 0581a6
References: CVE-2022-1158 bsc#1197660
Joerg Roedel 0581a6
Joerg Roedel 0581a6
FNAME(cmpxchg_gpte) is an inefficient mess.  It is at least decent if it
Joerg Roedel 0581a6
can go through get_user_pages_fast(), but if it cannot then it tries to
Joerg Roedel 0581a6
use memremap(); that is not just terribly slow, it is also wrong because
Joerg Roedel 0581a6
it assumes that the VM_PFNMAP VMA is contiguous.
Joerg Roedel 0581a6
Joerg Roedel 0581a6
The right way to do it would be to do the same thing as
Joerg Roedel 0581a6
hva_to_pfn_remapped() does since commit add6a0cd1c5b ("KVM: MMU: try to
Joerg Roedel 0581a6
fix up page faults before giving up", 2016-07-05), using follow_pte()
Joerg Roedel 0581a6
and fixup_user_fault() to determine the correct address to use for
Joerg Roedel 0581a6
memremap().  To do this, one could for example extract hva_to_pfn()
Joerg Roedel 0581a6
for use outside virt/kvm/kvm_main.c.  But really there is no reason to
Joerg Roedel 0581a6
do that either, because there is already a perfectly valid address to
Joerg Roedel 0581a6
do the cmpxchg() on, only it is a userspace address.  That means doing
Joerg Roedel 0581a6
user_access_begin()/user_access_end() and writing the code in assembly
Joerg Roedel 0581a6
to handle exceptions correctly.  Worse, the guest PTE can be 8-byte
Joerg Roedel 0581a6
even on i686 so there is the extra complication of using cmpxchg8b to
Joerg Roedel 0581a6
account for.  But at least it is an efficient mess.
Joerg Roedel 0581a6
Joerg Roedel 0581a6
(Thanks to Linus for suggesting improvement on the inline assembly).
Joerg Roedel 0581a6
Joerg Roedel 0581a6
Reported-by: Qiuhao Li <qiuhao@sysec.org>
Joerg Roedel 0581a6
Reported-by: Gaoning Pan <pgn@zju.edu.cn>
Joerg Roedel 0581a6
Reported-by: Yongkang Jia <kangel@zju.edu.cn>
Joerg Roedel 0581a6
Reported-by: syzbot+6cde2282daa792c49ab8@syzkaller.appspotmail.com
Joerg Roedel 0581a6
Debugged-by: Tadeusz Struk <tadeusz.struk@linaro.org>
Joerg Roedel 0581a6
Tested-by: Maxim Levitsky <mlevitsk@redhat.com>
Joerg Roedel 0581a6
Cc: stable@vger.kernel.org
Joerg Roedel 0581a6
Fixes: bd53cb35a3e9 ("X86/KVM: Handle PFNs outside of kernel reach when touching GPTEs")
Joerg Roedel 0581a6
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Joerg Roedel 0581a6
Acked-by: Joerg Roedel <jroedel@suse.de>
Joerg Roedel 0581a6
---
Joerg Roedel 0581a6
 arch/x86/kvm/paging_tmpl.h |   78 +++++++++++++++++++++------------------------
Joerg Roedel 0581a6
 1 file changed, 38 insertions(+), 40 deletions(-)
Joerg Roedel 0581a6
Joerg Roedel 0581a6
--- a/arch/x86/kvm/paging_tmpl.h
Joerg Roedel 0581a6
+++ b/arch/x86/kvm/paging_tmpl.h
Joerg Roedel 0581a6
@@ -34,9 +34,8 @@
Joerg Roedel 0581a6
 	#define PT_HAVE_ACCESSED_DIRTY(mmu) true
Joerg Roedel 0581a6
 	#ifdef CONFIG_X86_64
Joerg Roedel 0581a6
 	#define PT_MAX_FULL_LEVELS PT64_ROOT_MAX_LEVEL
Joerg Roedel 0581a6
-	#define CMPXCHG cmpxchg
Joerg Roedel 0581a6
+	#define CMPXCHG "cmpxchgq"
Joerg Roedel 0581a6
 	#else
Joerg Roedel 0581a6
-	#define CMPXCHG cmpxchg64
Joerg Roedel 0581a6
 	#define PT_MAX_FULL_LEVELS 2
Joerg Roedel 0581a6
 	#endif
Joerg Roedel 0581a6
 #elif PTTYPE == 32
Joerg Roedel 0581a6
@@ -52,7 +51,7 @@
Joerg Roedel 0581a6
 	#define PT_GUEST_DIRTY_SHIFT PT_DIRTY_SHIFT
Joerg Roedel 0581a6
 	#define PT_GUEST_ACCESSED_SHIFT PT_ACCESSED_SHIFT
Joerg Roedel 0581a6
 	#define PT_HAVE_ACCESSED_DIRTY(mmu) true
Joerg Roedel 0581a6
-	#define CMPXCHG cmpxchg
Joerg Roedel 0581a6
+	#define CMPXCHG "cmpxchgl"
Joerg Roedel 0581a6
 #elif PTTYPE == PTTYPE_EPT
Joerg Roedel 0581a6
 	#define pt_element_t u64
Joerg Roedel 0581a6
 	#define guest_walker guest_walkerEPT
Joerg Roedel 0581a6
@@ -65,7 +64,9 @@
Joerg Roedel 0581a6
 	#define PT_GUEST_DIRTY_SHIFT 9
Joerg Roedel 0581a6
 	#define PT_GUEST_ACCESSED_SHIFT 8
Joerg Roedel 0581a6
 	#define PT_HAVE_ACCESSED_DIRTY(mmu) ((mmu)->ept_ad)
Joerg Roedel 0581a6
-	#define CMPXCHG cmpxchg64
Joerg Roedel 0581a6
+	#ifdef CONFIG_X86_64
Joerg Roedel 0581a6
+	#define CMPXCHG "cmpxchgq"
Joerg Roedel 0581a6
+	#endif
Joerg Roedel 0581a6
 	#define PT_MAX_FULL_LEVELS 4
Joerg Roedel 0581a6
 #else
Joerg Roedel 0581a6
 	#error Invalid PTTYPE value
Joerg Roedel 0581a6
@@ -132,43 +133,40 @@ static int FNAME(cmpxchg_gpte)(struct kv
Joerg Roedel 0581a6
 			       pt_element_t __user *ptep_user, unsigned index,
Joerg Roedel 0581a6
 			       pt_element_t orig_pte, pt_element_t new_pte)
Joerg Roedel 0581a6
 {
Joerg Roedel 0581a6
-	int npages;
Joerg Roedel 0581a6
-	pt_element_t ret;
Joerg Roedel 0581a6
-	pt_element_t *table;
Joerg Roedel 0581a6
-	struct page *page;
Joerg Roedel 0581a6
-
Joerg Roedel 0581a6
-	npages = get_user_pages_fast((unsigned long)ptep_user, 1, FOLL_WRITE, &page);
Joerg Roedel 0581a6
-	if (likely(npages == 1)) {
Joerg Roedel 0581a6
-		table = kmap_atomic(page);
Joerg Roedel 0581a6
-		ret = CMPXCHG(&table[index], orig_pte, new_pte);
Joerg Roedel 0581a6
-		kunmap_atomic(table);
Joerg Roedel 0581a6
-
Joerg Roedel 0581a6
-		kvm_release_page_dirty(page);
Joerg Roedel 0581a6
-	} else {
Joerg Roedel 0581a6
-		struct vm_area_struct *vma;
Joerg Roedel 0581a6
-		unsigned long vaddr = (unsigned long)ptep_user & PAGE_MASK;
Joerg Roedel 0581a6
-		unsigned long pfn;
Joerg Roedel 0581a6
-		unsigned long paddr;
Joerg Roedel 0581a6
-
Joerg Roedel 0581a6
-		down_read(&current->mm->mmap_sem);
Joerg Roedel 0581a6
-		vma = find_vma_intersection(current->mm, vaddr, vaddr + PAGE_SIZE);
Joerg Roedel 0581a6
-		if (!vma || !(vma->vm_flags & VM_PFNMAP)) {
Joerg Roedel 0581a6
-			up_read(&current->mm->mmap_sem);
Joerg Roedel 0581a6
-			return -EFAULT;
Joerg Roedel 0581a6
-		}
Joerg Roedel 0581a6
-		pfn = ((vaddr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
Joerg Roedel 0581a6
-		paddr = pfn << PAGE_SHIFT;
Joerg Roedel 0581a6
-		table = memremap(paddr, PAGE_SIZE, MEMREMAP_WB);
Joerg Roedel 0581a6
-		if (!table) {
Joerg Roedel 0581a6
-			up_read(&current->mm->mmap_sem);
Joerg Roedel 0581a6
-			return -EFAULT;
Joerg Roedel 0581a6
-		}
Joerg Roedel 0581a6
-		ret = CMPXCHG(&table[index], orig_pte, new_pte);
Joerg Roedel 0581a6
-		memunmap(table);
Joerg Roedel 0581a6
-		up_read(&current->mm->mmap_sem);
Joerg Roedel 0581a6
-	}
Joerg Roedel 0581a6
 
Joerg Roedel 0581a6
-	return (ret != orig_pte);
Joerg Roedel 0581a6
+	int r = -EFAULT;
Joerg Roedel 0581a6
+
Joerg Roedel 0581a6
+	if (!user_access_begin(ptep_user, sizeof(pt_element_t)))
Joerg Roedel 0581a6
+		return -EFAULT;
Joerg Roedel 0581a6
+
Joerg Roedel 0581a6
+#ifdef CMPXCHG
Joerg Roedel 0581a6
+	asm volatile("1:" LOCK_PREFIX CMPXCHG " %[new], %[ptr]\n"
Joerg Roedel 0581a6
+			"mov $0, %[r]\n"
Joerg Roedel 0581a6
+			"setnz %b[r]\n"
Joerg Roedel 0581a6
+			"2:"
Joerg Roedel 0581a6
+			_ASM_EXTABLE_UA(1b, 2b)
Joerg Roedel 0581a6
+			: [ptr] "+m" (*ptep_user),
Joerg Roedel 0581a6
+			[old] "+a" (orig_pte),
Joerg Roedel 0581a6
+			[r] "+q" (r)
Joerg Roedel 0581a6
+			: [new] "r" (new_pte)
Joerg Roedel 0581a6
+			: "memory");
Joerg Roedel 0581a6
+#else
Joerg Roedel 0581a6
+	asm volatile("1:" LOCK_PREFIX "cmpxchg8b %[ptr]\n"
Joerg Roedel 0581a6
+			"movl $0, %[r]\n"
Joerg Roedel 0581a6
+			"jz 2f\n"
Joerg Roedel 0581a6
+			"incl %[r]\n"
Joerg Roedel 0581a6
+			"2:"
Joerg Roedel 0581a6
+			_ASM_EXTABLE_UA(1b, 2b)
Joerg Roedel 0581a6
+			: [ptr] "+m" (*ptep_user),
Joerg Roedel 0581a6
+			[old] "+A" (orig_pte),
Joerg Roedel 0581a6
+			[r] "+rm" (r)
Joerg Roedel 0581a6
+			: [new_lo] "b" ((u32)new_pte),
Joerg Roedel 0581a6
+			[new_hi] "c" ((u32)(new_pte >> 32))
Joerg Roedel 0581a6
+			: "memory");
Joerg Roedel 0581a6
+#endif
Joerg Roedel 0581a6
+
Joerg Roedel 0581a6
+	user_access_end();
Joerg Roedel 0581a6
+	return r;
Joerg Roedel 0581a6
 }
Joerg Roedel 0581a6
 
Joerg Roedel 0581a6
 static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu,