Blob Blame History Raw
From 15472423ce47d6397d08d48daaae8590c9f9f242 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@c-s.fr>
Date: Thu, 22 Feb 2018 15:27:28 +0100
Subject: [PATCH] powerpc/mm/slice: Allow up to 64 low slices

References: FATE#326523
Patch-mainline: v4.17-rc1
Git-commit: 15472423ce47d6397d08d48daaae8590c9f9f242

While the implementation of the "slices" address space allows
a significant amount of high slices, it limits the number of
low slices to 16 due to the use of a single u64 low_slices_psize
element in struct mm_context_t

On the 8xx, the minimum slice size is the size of the area
covered by a single PMD entry, ie 4M in 4K pages mode and 64M in
16K pages mode. This means we could have at least 64 slices.

In order to override this limitation, this patch switches the
handling of low_slices_psize to char array as done already for
high_slices_psize.

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Acked-by: Michal Suchanek <msuchanek@suse.de>
---
 arch/powerpc/include/asm/book3s/64/mmu.h |  3 +-
 arch/powerpc/include/asm/mmu-8xx.h       |  7 +++-
 arch/powerpc/include/asm/paca.h          |  2 +-
 arch/powerpc/kernel/paca.c               |  3 +-
 arch/powerpc/mm/hash_utils_64.c          | 13 ++++----
 arch/powerpc/mm/slb_low.S                |  8 +++--
 arch/powerpc/mm/slice.c                  | 57 +++++++++++++++++---------------
 7 files changed, 52 insertions(+), 41 deletions(-)

--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -93,7 +93,8 @@ typedef struct {
 	struct npu_context *npu_context;
 
 #ifdef CONFIG_PPC_MM_SLICES
-	u64 low_slices_psize;	/* SLB page size encodings */
+	 /* SLB page size encodings*/
+	unsigned char low_slices_psize[BITS_PER_LONG / BITS_PER_BYTE];
 	unsigned char high_slices_psize[SLICE_ARRAY_SIZE];
 	unsigned long slb_addr_limit;
 #else
--- a/arch/powerpc/include/asm/mmu-8xx.h
+++ b/arch/powerpc/include/asm/mmu-8xx.h
@@ -163,6 +163,11 @@
  */
 #define SPRN_M_TW	799
 
+#ifdef CONFIG_PPC_MM_SLICES
+#include <asm/nohash/32/slice.h>
+#define SLICE_ARRAY_SIZE	(1 << (32 - SLICE_LOW_SHIFT - 1))
+#endif
+
 #ifndef __ASSEMBLY__
 typedef struct {
 	unsigned int id;
@@ -170,7 +175,7 @@ typedef struct {
 	unsigned long vdso_base;
 #ifdef CONFIG_PPC_MM_SLICES
 	u16 user_psize;		/* page size index */
-	u64 low_slices_psize;	/* page size encodings */
+	unsigned char low_slices_psize[SLICE_ARRAY_SIZE];
 	unsigned char high_slices_psize[0];
 	unsigned long slb_addr_limit;
 #endif
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -141,7 +141,7 @@ struct paca_struct {
 #ifdef CONFIG_PPC_BOOK3S
 	mm_context_id_t mm_ctx_id;
 #ifdef CONFIG_PPC_MM_SLICES
-	u64 mm_ctx_low_slices_psize;
+	unsigned char mm_ctx_low_slices_psize[BITS_PER_LONG / BITS_PER_BYTE];
 	unsigned char mm_ctx_high_slices_psize[SLICE_ARRAY_SIZE];
 	unsigned long mm_ctx_slb_addr_limit;
 #else
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -256,7 +256,8 @@ void copy_mm_to_paca(struct mm_struct *m
 #ifdef CONFIG_PPC_MM_SLICES
 	VM_BUG_ON(!mm->context.slb_addr_limit);
 	get_paca()->mm_ctx_slb_addr_limit = mm->context.slb_addr_limit;
-	get_paca()->mm_ctx_low_slices_psize = context->low_slices_psize;
+	memcpy(&get_paca()->mm_ctx_low_slices_psize,
+	       &context->low_slices_psize, sizeof(context->low_slices_psize));
 	memcpy(&get_paca()->mm_ctx_high_slices_psize,
 	       &context->high_slices_psize, TASK_SLICE_ARRAY_SZ(mm));
 #else /* CONFIG_PPC_MM_SLICES */
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -1109,19 +1109,18 @@ unsigned int hash_page_do_lazy_icache(un
 #ifdef CONFIG_PPC_MM_SLICES
 static unsigned int get_paca_psize(unsigned long addr)
 {
-	u64 lpsizes;
-	unsigned char *hpsizes;
+	unsigned char *psizes;
 	unsigned long index, mask_index;
 
 	if (addr < SLICE_LOW_TOP) {
-		lpsizes = get_paca()->mm_ctx_low_slices_psize;
+		psizes = get_paca()->mm_ctx_low_slices_psize;
 		index = GET_LOW_SLICE_INDEX(addr);
-		return (lpsizes >> (index * 4)) & 0xF;
+	} else {
+		psizes = get_paca()->mm_ctx_high_slices_psize;
+		index = GET_HIGH_SLICE_INDEX(addr);
 	}
-	hpsizes = get_paca()->mm_ctx_high_slices_psize;
-	index = GET_HIGH_SLICE_INDEX(addr);
 	mask_index = index & 0x1;
-	return (hpsizes[index >> 1] >> (mask_index * 4)) & 0xF;
+	return (psizes[index >> 1] >> (mask_index * 4)) & 0xF;
 }
 
 #else
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -200,10 +200,12 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEG
 5:
 	/*
 	 * Handle lpsizes
-	 * r9 is get_paca()->context.low_slices_psize, r11 is index
+	 * r9 is get_paca()->context.low_slices_psize[index], r11 is mask_index
 	 */
-	ld	r9,PACALOWSLICESPSIZE(r13)
-	mr	r11,r10
+	srdi    r11,r10,1 /* index */
+	addi	r9,r11,PACALOWSLICESPSIZE
+	lbzx	r9,r13,r9		/* r9 is lpsizes[r11] */
+	rldicl	r11,r10,0,63		/* r11 = r10 & 0x1 */
 6:
 	sldi	r11,r11,2  /* index * 4 */
 	/* Extract the psize and multiply to get an array offset */
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -150,19 +150,21 @@ static void slice_mask_for_free(struct m
 static void slice_mask_for_size(struct mm_struct *mm, int psize, struct slice_mask *ret,
 				unsigned long high_limit)
 {
-	unsigned char *hpsizes;
+	unsigned char *hpsizes, *lpsizes;
 	int index, mask_index;
 	unsigned long i;
-	u64 lpsizes;
 
 	ret->low_slices = 0;
 	if (SLICE_NUM_HIGH)
 		bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
 
 	lpsizes = mm->context.low_slices_psize;
-	for (i = 0; i < SLICE_NUM_LOW; i++)
-		if (((lpsizes >> (i * 4)) & 0xf) == psize)
+	for (i = 0; i < SLICE_NUM_LOW; i++) {
+		mask_index = i & 0x1;
+		index = i >> 1;
+		if (((lpsizes[index] >> (mask_index * 4)) & 0xf) == psize)
 			ret->low_slices |= 1u << i;
+	}
 
 	if (high_limit <= SLICE_LOW_TOP)
 		return;
@@ -218,8 +220,7 @@ static void slice_convert(struct mm_stru
 {
 	int index, mask_index;
 	/* Write the new slice psize bits */
-	unsigned char *hpsizes;
-	u64 lpsizes;
+	unsigned char *hpsizes, *lpsizes;
 	unsigned long i, flags;
 
 	slice_dbg("slice_convert(mm=%p, psize=%d)\n", mm, psize);
@@ -232,12 +233,13 @@ static void slice_convert(struct mm_stru
 
 	lpsizes = mm->context.low_slices_psize;
 	for (i = 0; i < SLICE_NUM_LOW; i++)
-		if (mask.low_slices & (1u << i))
-			lpsizes = (lpsizes & ~(0xful << (i * 4))) |
-				(((unsigned long)psize) << (i * 4));
-
-	/* Assign the value back */
-	mm->context.low_slices_psize = lpsizes;
+		if (mask.low_slices & (1u << i)) {
+			mask_index = i & 0x1;
+			index = i >> 1;
+			lpsizes[index] = (lpsizes[index] &
+					  ~(0xf << (mask_index * 4))) |
+				(((unsigned long)psize) << (mask_index * 4));
+		}
 
 	hpsizes = mm->context.high_slices_psize;
 	for (i = 0; i < GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit); i++) {
@@ -644,7 +646,7 @@ unsigned long arch_get_unmapped_area_top
 
 unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr)
 {
-	unsigned char *hpsizes;
+	unsigned char *psizes;
 	int index, mask_index;
 
 	/*
@@ -658,15 +660,14 @@ unsigned int get_slice_psize(struct mm_s
 #endif
 	}
 	if (addr < SLICE_LOW_TOP) {
-		u64 lpsizes;
-		lpsizes = mm->context.low_slices_psize;
+		psizes = mm->context.low_slices_psize;
 		index = GET_LOW_SLICE_INDEX(addr);
-		return (lpsizes >> (index * 4)) & 0xf;
+	} else {
+		psizes = mm->context.high_slices_psize;
+		index = GET_HIGH_SLICE_INDEX(addr);
 	}
-	hpsizes = mm->context.high_slices_psize;
-	index = GET_HIGH_SLICE_INDEX(addr);
 	mask_index = index & 0x1;
-	return (hpsizes[index >> 1] >> (mask_index * 4)) & 0xf;
+	return (psizes[index >> 1] >> (mask_index * 4)) & 0xf;
 }
 EXPORT_SYMBOL_GPL(get_slice_psize);
 
@@ -687,8 +688,8 @@ EXPORT_SYMBOL_GPL(get_slice_psize);
 void slice_set_user_psize(struct mm_struct *mm, unsigned int psize)
 {
 	int index, mask_index;
-	unsigned char *hpsizes;
-	unsigned long flags, lpsizes;
+	unsigned char *hpsizes, *lpsizes;
+	unsigned long flags;
 	unsigned int old_psize;
 	int i;
 
@@ -706,12 +707,14 @@ void slice_set_user_psize(struct mm_stru
 	wmb();
 
 	lpsizes = mm->context.low_slices_psize;
-	for (i = 0; i < SLICE_NUM_LOW; i++)
-		if (((lpsizes >> (i * 4)) & 0xf) == old_psize)
-			lpsizes = (lpsizes & ~(0xful << (i * 4))) |
-				(((unsigned long)psize) << (i * 4));
-	/* Assign the value back */
-	mm->context.low_slices_psize = lpsizes;
+	for (i = 0; i < SLICE_NUM_LOW; i++) {
+		mask_index = i & 0x1;
+		index = i >> 1;
+		if (((lpsizes[index] >> (mask_index * 4)) & 0xf) == old_psize)
+			lpsizes[index] = (lpsizes[index] &
+					  ~(0xf << (mask_index * 4))) |
+				(((unsigned long)psize) << (mask_index * 4));
+	}
 
 	hpsizes = mm->context.high_slices_psize;
 	for (i = 0; i < SLICE_NUM_HIGH; i++) {