Blob Blame History Raw
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Subject: s390/mm: add no-dat TLB flush optimization
Patch-mainline: v4.14-rc1
Git-commit: 118bd31bea2cdb7f1dbf22dd9a58e818b5313156
References: FATE#321512, bsc#1061009

Summary:     kernel: guest TLB purge enhancements
Description: Add support for the KVM guest TLB purge enhancements
             introduced with IBM z14.

Upstream-Description:

             s390/mm: add no-dat TLB flush optimization

             Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>


Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Acked-by: Hannes Reinecke <hare@suse.com>
---
 arch/s390/include/asm/pgtable.h  |   41 ++++++++++----
 arch/s390/include/asm/setup.h    |    6 +-
 arch/s390/include/asm/tlbflush.h |    5 +
 arch/s390/mm/pageattr.c          |    2 
 arch/s390/mm/pgtable.c           |  111 ++++++++++++++++++++++++++++++---------
 drivers/s390/char/sclp_early.c   |    6 +-
 6 files changed, 129 insertions(+), 42 deletions(-)

--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -899,15 +899,27 @@ static inline pte_t pte_mkhuge(pte_t pte
 #define IPTE_GLOBAL	0
 #define	IPTE_LOCAL	1
 
-static inline void __ptep_ipte(unsigned long address, pte_t *ptep, int local)
+#define IPTE_NODAT	0x400
+
+static inline void __ptep_ipte(unsigned long address, pte_t *ptep,
+			       unsigned long opt, int local)
 {
 	unsigned long pto = (unsigned long) ptep;
 
-	/* Invalidation + TLB flush for the pte */
+	if (__builtin_constant_p(opt) && opt == 0) {
+		/* Invalidation + TLB flush for the pte */
+		asm volatile(
+			"	.insn	rrf,0xb2210000,%[r1],%[r2],0,%[m4]"
+			: "+m" (*ptep) : [r1] "a" (pto), [r2] "a" (address),
+			  [m4] "i" (local));
+		return;
+	}
+
+	/* Invalidate ptes with options + TLB flush of the ptes */
 	asm volatile(
-		"       .insn rrf,0xb2210000,%[r1],%[r2],0,%[m4]"
-		: "+m" (*ptep) : [r1] "a" (pto), [r2] "a" (address),
-		  [m4] "i" (local));
+		"	.insn	rrf,0xb2210000,%[r1],%[r2],%[r3],%[m4]"
+		: [r2] "+a" (address), [r3] "+a" (opt)
+		: [r1] "a" (pto), [m4] "i" (local) : "memory");
 }
 
 static inline void __ptep_ipte_range(unsigned long address, int nr,
@@ -1274,31 +1286,36 @@ static inline void __pmdp_csp(pmd_t *pmd
 #define IDTE_GLOBAL	0
 #define IDTE_LOCAL	1
 
-static inline void __pmdp_idte(unsigned long address, pmd_t *pmdp, int local)
+#define IDTE_PTOA	0x0800
+#define IDTE_NODAT	0x1000
+
+static inline void __pmdp_idte(unsigned long addr, pmd_t *pmdp,
+			       unsigned long opt, int local)
 {
 	unsigned long sto;
 
-	sto = (unsigned long) pmdp - pmd_index(address) * sizeof(pmd_t);
+	sto = (unsigned long) pmdp - pmd_index(addr) * sizeof(pmd_t);
 	asm volatile(
 		"	.insn	rrf,0xb98e0000,%[r1],%[r2],0,%[m4]"
 		: "+m" (*pmdp)
-		: [r1] "a" (sto), [r2] "a" ((address & HPAGE_MASK)),
+		: [r1] "a" (sto), [r2] "a" ((addr & HPAGE_MASK) | opt),
 		  [m4] "i" (local)
 		: "cc" );
 }
 
-static inline void __pudp_idte(unsigned long address, pud_t *pudp, int local)
+static inline void __pudp_idte(unsigned long addr, pud_t *pudp,
+			       unsigned long opt, int local)
 {
 	unsigned long r3o;
 
-	r3o = (unsigned long) pudp - pud_index(address) * sizeof(pud_t);
+	r3o = (unsigned long) pudp - pud_index(addr) * sizeof(pud_t);
 	r3o |= _ASCE_TYPE_REGION3;
 	asm volatile(
 		"	.insn	rrf,0xb98e0000,%[r1],%[r2],0,%[m4]"
 		: "+m" (*pudp)
-		: [r1] "a" (r3o), [r2] "a" ((address & PUD_MASK)),
+		: [r1] "a" (r3o), [r2] "a" ((addr & PUD_MASK) | opt),
 		  [m4] "i" (local)
-		: "cc");
+		: "cc" );
 }
 
 pmd_t pmdp_xchg_direct(struct mm_struct *, unsigned long, pmd_t *, pmd_t);
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -29,8 +29,9 @@
 #define MACHINE_FLAG_TE		_BITUL(11)
 #define MACHINE_FLAG_TLB_LC	_BITUL(12)
 #define MACHINE_FLAG_VX		_BITUL(13)
-#define MACHINE_FLAG_NX		_BITUL(14)
-#define MACHINE_FLAG_GS		_BITUL(15)
+#define MACHINE_FLAG_TLB_GUEST	_BITUL(14)
+#define MACHINE_FLAG_NX		_BITUL(15)
+#define MACHINE_FLAG_GS		_BITUL(16)
 
 #define LPP_MAGIC		_BITUL(31)
 #define LPP_PFAULT_PID_MASK	_AC(0xffffffff, UL)
@@ -68,6 +69,7 @@ extern void detect_memory_memblock(void)
 #define MACHINE_HAS_TE		(S390_lowcore.machine_flags & MACHINE_FLAG_TE)
 #define MACHINE_HAS_TLB_LC	(S390_lowcore.machine_flags & MACHINE_FLAG_TLB_LC)
 #define MACHINE_HAS_VX		(S390_lowcore.machine_flags & MACHINE_FLAG_VX)
+#define MACHINE_HAS_TLB_GUEST	(S390_lowcore.machine_flags & MACHINE_FLAG_TLB_GUEST)
 #define MACHINE_HAS_NX		(S390_lowcore.machine_flags & MACHINE_FLAG_NX)
 #define MACHINE_HAS_GS		(S390_lowcore.machine_flags & MACHINE_FLAG_GS)
 
--- a/arch/s390/include/asm/tlbflush.h
+++ b/arch/s390/include/asm/tlbflush.h
@@ -20,10 +20,13 @@ static inline void __tlb_flush_local(voi
  */
 static inline void __tlb_flush_idte(unsigned long asce)
 {
+	unsigned long opt;
+
+	opt = IDTE_PTOA;
 	/* Global TLB flush for the mm */
 	asm volatile(
 		"	.insn	rrf,0xb98e0000,0,%0,%1,0"
-		: : "a" (2048), "a" (asce) : "cc");
+		: : "a" (opt), "a" (asce) : "cc");
 }
 
 #ifdef CONFIG_SMP
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -308,7 +308,7 @@ static void ipte_range(pte_t *pte, unsig
 		return;
 	}
 	for (i = 0; i < nr; i++) {
-		__ptep_ipte(address, pte, IPTE_GLOBAL);
+		__ptep_ipte(address, pte, 0, IPTE_GLOBAL);
 		address += PAGE_SIZE;
 		pte++;
 	}
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -25,6 +25,38 @@
 #include <asm/mmu_context.h>
 #include <asm/page-states.h>
 
+static inline void ptep_ipte_local(struct mm_struct *mm, unsigned long addr,
+				   pte_t *ptep)
+{
+	unsigned long opt, asce;
+
+	if (MACHINE_HAS_TLB_GUEST) {
+		opt = 0;
+		asce = READ_ONCE(mm->context.gmap_asce);
+		if (asce == 0UL)
+			opt |= IPTE_NODAT;
+		__ptep_ipte(addr, ptep, opt, IPTE_LOCAL);
+	} else {
+		__ptep_ipte(addr, ptep, 0, IPTE_LOCAL);
+	}
+}
+
+static inline void ptep_ipte_global(struct mm_struct *mm, unsigned long addr,
+				    pte_t *ptep)
+{
+	unsigned long opt, asce;
+
+	if (MACHINE_HAS_TLB_GUEST) {
+		opt = 0;
+		asce = READ_ONCE(mm->context.gmap_asce);
+		if (asce == 0UL)
+			opt |= IPTE_NODAT;
+		__ptep_ipte(addr, ptep, opt, IPTE_GLOBAL);
+	} else {
+		__ptep_ipte(addr, ptep, 0, IPTE_GLOBAL);
+	}
+}
+
 static inline pte_t ptep_flush_direct(struct mm_struct *mm,
 				      unsigned long addr, pte_t *ptep)
 {
@@ -36,9 +68,9 @@ static inline pte_t ptep_flush_direct(st
 	atomic_inc(&mm->context.flush_count);
 	if (MACHINE_HAS_TLB_LC &&
 	    cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
-		__ptep_ipte(addr, ptep, IPTE_LOCAL);
+		ptep_ipte_local(mm, addr, ptep);
 	else
-		__ptep_ipte(addr, ptep, IPTE_GLOBAL);
+		ptep_ipte_global(mm, addr, ptep);
 	atomic_dec(&mm->context.flush_count);
 	return old;
 }
@@ -57,7 +89,7 @@ static inline pte_t ptep_flush_lazy(stru
 		pte_val(*ptep) |= _PAGE_INVALID;
 		mm->context.flush_mm = 1;
 	} else
-		__ptep_ipte(addr, ptep, IPTE_GLOBAL);
+		ptep_ipte_global(mm, addr, ptep);
 	atomic_dec(&mm->context.flush_count);
 	return old;
 }
@@ -290,6 +322,26 @@ void ptep_modify_prot_commit(struct mm_s
 }
 EXPORT_SYMBOL(ptep_modify_prot_commit);
 
+static inline void pmdp_idte_local(struct mm_struct *mm,
+				   unsigned long addr, pmd_t *pmdp)
+{
+	if (MACHINE_HAS_TLB_GUEST)
+		__pmdp_idte(addr, pmdp, IDTE_NODAT, IDTE_LOCAL);
+	else
+		__pmdp_idte(addr, pmdp, 0, IDTE_LOCAL);
+}
+
+static inline void pmdp_idte_global(struct mm_struct *mm,
+				    unsigned long addr, pmd_t *pmdp)
+{
+	if (MACHINE_HAS_TLB_GUEST)
+		__pmdp_idte(addr, pmdp, IDTE_NODAT, IDTE_GLOBAL);
+	else if (MACHINE_HAS_IDTE)
+		__pmdp_idte(addr, pmdp, 0, IDTE_GLOBAL);
+	else
+		__pmdp_csp(pmdp);
+}
+
 static inline pmd_t pmdp_flush_direct(struct mm_struct *mm,
 				      unsigned long addr, pmd_t *pmdp)
 {
@@ -298,16 +350,12 @@ static inline pmd_t pmdp_flush_direct(st
 	old = *pmdp;
 	if (pmd_val(old) & _SEGMENT_ENTRY_INVALID)
 		return old;
-	if (!MACHINE_HAS_IDTE) {
-		__pmdp_csp(pmdp);
-		return old;
-	}
 	atomic_inc(&mm->context.flush_count);
 	if (MACHINE_HAS_TLB_LC &&
 	    cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
-		__pmdp_idte(addr, pmdp, IDTE_LOCAL);
+		pmdp_idte_local(mm, addr, pmdp);
 	else
-		__pmdp_idte(addr, pmdp, IDTE_GLOBAL);
+		pmdp_idte_global(mm, addr, pmdp);
 	atomic_dec(&mm->context.flush_count);
 	return old;
 }
@@ -325,10 +373,9 @@ static inline pmd_t pmdp_flush_lazy(stru
 			  cpumask_of(smp_processor_id()))) {
 		pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID;
 		mm->context.flush_mm = 1;
-	} else if (MACHINE_HAS_IDTE)
-		__pmdp_idte(addr, pmdp, IDTE_GLOBAL);
-	else
-		__pmdp_csp(pmdp);
+	} else {
+		pmdp_idte_global(mm, addr, pmdp);
+	}
 	atomic_dec(&mm->context.flush_count);
 	return old;
 }
@@ -359,28 +406,44 @@ pmd_t pmdp_xchg_lazy(struct mm_struct *m
 }
 EXPORT_SYMBOL(pmdp_xchg_lazy);
 
-static inline pud_t pudp_flush_direct(struct mm_struct *mm,
-				      unsigned long addr, pud_t *pudp)
+static inline void pudp_idte_local(struct mm_struct *mm,
+				   unsigned long addr, pud_t *pudp)
 {
-	pud_t old;
+	if (MACHINE_HAS_TLB_GUEST)
+		__pudp_idte(addr, pudp, IDTE_NODAT, IDTE_LOCAL);
+	else
+		__pudp_idte(addr, pudp, 0, IDTE_LOCAL);
+}
 
-	old = *pudp;
-	if (pud_val(old) & _REGION_ENTRY_INVALID)
-		return old;
-	if (!MACHINE_HAS_IDTE) {
+static inline void pudp_idte_global(struct mm_struct *mm,
+				    unsigned long addr, pud_t *pudp)
+{
+	if (MACHINE_HAS_TLB_GUEST)
+		__pudp_idte(addr, pudp, IDTE_NODAT, IDTE_GLOBAL);
+	else if (MACHINE_HAS_IDTE)
+		__pudp_idte(addr, pudp, 0, IDTE_GLOBAL);
+	else
 		/*
 		 * Invalid bit position is the same for pmd and pud, so we can
 		 * re-use _pmd_csp() here
 		 */
 		__pmdp_csp((pmd_t *) pudp);
+}
+
+static inline pud_t pudp_flush_direct(struct mm_struct *mm,
+				      unsigned long addr, pud_t *pudp)
+{
+	pud_t old;
+
+	old = *pudp;
+	if (pud_val(old) & _REGION_ENTRY_INVALID)
 		return old;
-	}
 	atomic_inc(&mm->context.flush_count);
 	if (MACHINE_HAS_TLB_LC &&
 	    cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
-		__pudp_idte(addr, pudp, IDTE_LOCAL);
+		pudp_idte_local(mm, addr, pudp);
 	else
-		__pudp_idte(addr, pudp, IDTE_GLOBAL);
+		pudp_idte_global(mm, addr, pudp);
 	atomic_dec(&mm->context.flush_count);
 	return old;
 }
@@ -641,7 +704,7 @@ bool test_and_clear_guest_dirty(struct m
 	pte = *ptep;
 	if (dirty && (pte_val(pte) & _PAGE_PRESENT)) {
 		pgste = pgste_pte_notify(mm, addr, ptep, pgste);
-		__ptep_ipte(addr, ptep, IPTE_GLOBAL);
+		ptep_ipte_global(mm, addr, ptep);
 		if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE))
 			pte_val(pte) |= _PAGE_PROTECT;
 		else
--- a/drivers/s390/char/sclp_early.c
+++ b/drivers/s390/char/sclp_early.c
@@ -39,7 +39,7 @@ struct read_info_sccb {
 	u8	fac84;			/* 84 */
 	u8	fac85;			/* 85 */
 	u8	_pad_86[91 - 86];	/* 86-90 */
-	u8	flags;			/* 91 */
+	u8	fac91;			/* 91 */
 	u8	_pad_92[98 - 92];	/* 92-97 */
 	u8	fac98;			/* 98 */
 	u8	hamaxpow;		/* 99 */
@@ -103,6 +103,8 @@ static void __init sclp_early_facilities
 	sclp.has_kss = !!(sccb->fac98 & 0x01);
 	if (sccb->fac85 & 0x02)
 		S390_lowcore.machine_flags |= MACHINE_FLAG_ESOP;
+	if (sccb->fac91 & 0x40)
+		S390_lowcore.machine_flags |= MACHINE_FLAG_TLB_GUEST;
 	sclp.rnmax = sccb->rnmax ? sccb->rnmax : sccb->rnmax2;
 	sclp.rzm = sccb->rnsize ? sccb->rnsize : sccb->rnsize2;
 	sclp.rzm <<= 20;
@@ -139,7 +141,7 @@ static void __init sclp_early_facilities
 
 	/* Save IPL information */
 	sclp_ipl_info.is_valid = 1;
-	if (sccb->flags & 0x2)
+	if (sccb->fac91 & 0x2)
 		sclp_ipl_info.has_dump = 1;
 	memcpy(&sclp_ipl_info.loadparm, &sccb->loadparm, LOADPARM_LEN);