Blob Blame History Raw
From 1a367063ca0c1c6f6f54b5abd7b4836b0866a07b Mon Sep 17 00:00:00 2001
From: Nathan Fontenot <nfont@linux.vnet.ibm.com>
Date: Wed, 2 Aug 2017 14:03:22 -0400
Subject: [PATCH] powerpc/pseries: Check memory device state before
 onlining/offlining

References: FATE#322022, bsc#1065729
Patch-mainline: v4.14-rc1
Git-commit: 1a367063ca0c1c6f6f54b5abd7b4836b0866a07b

When DLPAR adding or removing memory we need to check the device
offline status before trying to online/offline the memory. This is
needed because calls to device_online() and device_offline() will
return non-zero for memory that is already online and offline
respectively.

This update resolves two scenarios. First, for a kernel built with
auto-online memory enabled (CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE=y),
memory will be onlined as part of calls to add_memory(). After adding
the memory the pseries DLPAR code tries to online it and fails since
the memory is already online. The DLPAR code then tries to remove the
memory which produces the oops message below because the memory is not
offline.

The second scenario occurs when removing memory that is already
offline, i.e. marking memory offline (via sysfs) and then trying to
remove that memory. This doesn't work because offlining the already
offline memory does not succeed and the DLPAR code then fails the
DLPAR remove operation.

The fix for both scenarios is to check the device.offline status
before making the calls to device_online() or device_offline().

  kernel BUG at mm/memory_hotplug.c:1936!
  ...
  NIP [c0000000002ca428] .remove_memory+0xb8/0xc0
  LR [c0000000002ca3cc] .remove_memory+0x5c/0xc0
  Call Trace:
    .remove_memory+0x5c/0xc0 (unreliable)
    .dlpar_add_lmb+0x384/0x400
    .dlpar_memory+0x5dc/0xca0
    .handle_dlpar_errorlog+0x74/0xe0
    .pseries_hp_work_fn+0x2c/0x90
    .process_one_work+0x17c/0x460
    .worker_thread+0x88/0x500
    .kthread+0x15c/0x1a0
    .ret_from_kernel_thread+0x58/0xc0

Fixes: 943db62c316c ("powerpc/pseries: Revert 'Auto-online hotplugged memory'")
Signed-off-by: Nathan Fontenot <nfont@linux.vnet.ibm.com>
[mpe: Use bool, add explicit rc=0 case, change log typos & formatting]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Acked-by: Michal Suchanek <msuchanek@suse.de>
---
 arch/powerpc/platforms/pseries/hotplug-memory.c | 53 +++++++++++++++----------
 1 file changed, 32 insertions(+), 21 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index ca9b2f4aaa22..9e3afd238d34 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -336,7 +336,38 @@ static struct memory_block *lmb_to_memblock(struct of_drconf_cell *lmb)
 	return mem_block;
 }
 
+static int dlpar_change_lmb_state(struct of_drconf_cell *lmb, bool online)
+{
+	struct memory_block *mem_block;
+	int rc;
+
+	mem_block = lmb_to_memblock(lmb);
+	if (!mem_block)
+		return -EINVAL;
+
+	if (online && mem_block->dev.offline)
+		rc = device_online(&mem_block->dev);
+	else if (!online && !mem_block->dev.offline)
+		rc = device_offline(&mem_block->dev);
+	else
+		rc = 0;
+
+	put_device(&mem_block->dev);
+
+	return rc;
+}
+
+static int dlpar_online_lmb(struct of_drconf_cell *lmb)
+{
+	return dlpar_change_lmb_state(lmb, true);
+}
+
 #ifdef CONFIG_MEMORY_HOTREMOVE
+static int dlpar_offline_lmb(struct of_drconf_cell *lmb)
+{
+	return dlpar_change_lmb_state(lmb, false);
+}
+
 static int pseries_remove_memblock(unsigned long base, unsigned int memblock_size)
 {
 	unsigned long block_sz, start_pfn;
@@ -431,19 +462,13 @@ static int dlpar_add_lmb(struct of_drconf_cell *);
 
 static int dlpar_remove_lmb(struct of_drconf_cell *lmb)
 {
-	struct memory_block *mem_block;
 	unsigned long block_sz;
 	int nid, rc;
 
 	if (!lmb_is_removable(lmb))
 		return -EINVAL;
 
-	mem_block = lmb_to_memblock(lmb);
-	if (!mem_block)
-		return -EINVAL;
-
-	rc = device_offline(&mem_block->dev);
-	put_device(&mem_block->dev);
+	rc = dlpar_offline_lmb(lmb);
 	if (rc)
 		return rc;
 
@@ -737,20 +762,6 @@ static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index,
 }
 #endif /* CONFIG_MEMORY_HOTREMOVE */
 
-static int dlpar_online_lmb(struct of_drconf_cell *lmb)
-{
-	struct memory_block *mem_block;
-	int rc;
-
-	mem_block = lmb_to_memblock(lmb);
-	if (!mem_block)
-		return -EINVAL;
-
-	rc = device_online(&mem_block->dev);
-	put_device(&mem_block->dev);
-	return rc;
-}
-
 static int dlpar_add_lmb(struct of_drconf_cell *lmb)
 {
 	unsigned long block_sz;
-- 
2.10.2