|
Michal Suchanek |
28f610 |
From aeca35b9a52b0e0d019a5244fbaab699f753b443 Mon Sep 17 00:00:00 2001
|
|
Michal Suchanek |
28f610 |
From: Nathan Lynch <nathanl@linux.ibm.com>
|
|
Michal Suchanek |
28f610 |
Date: Mon, 7 Dec 2020 15:51:46 -0600
|
|
Michal Suchanek |
28f610 |
Subject: [PATCH] powerpc/pseries/mobility: retry partition suspend after error
|
|
Michal Suchanek |
28f610 |
|
|
Michal Suchanek |
28f610 |
References: bsc#1181674 ltc#189159
|
|
Michal Suchanek |
28f610 |
Patch-mainline: v5.11-rc1
|
|
Michal Suchanek |
28f610 |
Git-commit: aeca35b9a52b0e0d019a5244fbaab699f753b443
|
|
Michal Suchanek |
28f610 |
|
|
Michal Suchanek |
28f610 |
This is a mitigation for the relatively rare occurrence where a
|
|
Michal Suchanek |
28f610 |
virtual IOA can be in a transient state that prevents the
|
|
Michal Suchanek |
28f610 |
suspend/migration from succeeding, resulting in an error from
|
|
Michal Suchanek |
28f610 |
ibm,suspend-me.
|
|
Michal Suchanek |
28f610 |
|
|
Michal Suchanek |
28f610 |
If the join/suspend sequence returns an error, it is acceptable to
|
|
Michal Suchanek |
28f610 |
retry as long as the VASI suspend session state is still
|
|
Michal Suchanek |
28f610 |
"Suspending" (i.e. the platform is still waiting for the OS to
|
|
Michal Suchanek |
28f610 |
suspend).
|
|
Michal Suchanek |
28f610 |
|
|
Michal Suchanek |
28f610 |
Retry a few times on suspend failure while this condition holds,
|
|
Michal Suchanek |
28f610 |
progressively increasing the delay between attempts. We don't want to
|
|
Michal Suchanek |
28f610 |
retry indefinitey because firmware emits an error log event on each
|
|
Michal Suchanek |
28f610 |
unsuccessful attempt.
|
|
Michal Suchanek |
28f610 |
|
|
Michal Suchanek |
28f610 |
Signed-off-by: Nathan Lynch <nathanl@linux.ibm.com>
|
|
Michal Suchanek |
28f610 |
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
|
|
Michal Suchanek |
28f610 |
Link: https://lore.kernel.org/r/20201207215200.1785968-15-nathanl@linux.ibm.com
|
|
Michal Suchanek |
28f610 |
Acked-by: Michal Suchanek <msuchanek@suse.de>
|
|
Michal Suchanek |
28f610 |
---
|
|
Michal Suchanek |
28f610 |
arch/powerpc/platforms/pseries/mobility.c | 59 ++++++++++++++++++++++-
|
|
Michal Suchanek |
28f610 |
1 file changed, 57 insertions(+), 2 deletions(-)
|
|
Michal Suchanek |
28f610 |
|
|
Michal Suchanek |
28f610 |
diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
|
|
Michal Suchanek |
28f610 |
index f234a7ed87aa..fe7e35cdc9d5 100644
|
|
Michal Suchanek |
28f610 |
--- a/arch/powerpc/platforms/pseries/mobility.c
|
|
Michal Suchanek |
28f610 |
+++ b/arch/powerpc/platforms/pseries/mobility.c
|
|
Michal Suchanek |
28f610 |
@@ -542,16 +542,71 @@ static void pseries_cancel_migration(u64 handle, int err)
|
|
Michal Suchanek |
28f610 |
pr_err("H_VASI_SIGNAL error: %ld\n", hvrc);
|
|
Michal Suchanek |
28f610 |
}
|
|
Michal Suchanek |
28f610 |
|
|
Michal Suchanek |
28f610 |
+static int pseries_suspend(u64 handle)
|
|
Michal Suchanek |
28f610 |
+{
|
|
Michal Suchanek |
28f610 |
+ const unsigned int max_attempts = 5;
|
|
Michal Suchanek |
28f610 |
+ unsigned int retry_interval_ms = 1;
|
|
Michal Suchanek |
28f610 |
+ unsigned int attempt = 1;
|
|
Michal Suchanek |
28f610 |
+ int ret;
|
|
Michal Suchanek |
28f610 |
+
|
|
Michal Suchanek |
28f610 |
+ while (true) {
|
|
Michal Suchanek |
28f610 |
+ atomic_t counter = ATOMIC_INIT(0);
|
|
Michal Suchanek |
28f610 |
+ unsigned long vasi_state;
|
|
Michal Suchanek |
28f610 |
+ int vasi_err;
|
|
Michal Suchanek |
28f610 |
+
|
|
Michal Suchanek |
28f610 |
+ ret = stop_machine(do_join, &counter, cpu_online_mask);
|
|
Michal Suchanek |
28f610 |
+ if (ret == 0)
|
|
Michal Suchanek |
28f610 |
+ break;
|
|
Michal Suchanek |
28f610 |
+ /*
|
|
Michal Suchanek |
28f610 |
+ * Encountered an error. If the VASI stream is still
|
|
Michal Suchanek |
28f610 |
+ * in Suspending state, it's likely a transient
|
|
Michal Suchanek |
28f610 |
+ * condition related to some device in the partition
|
|
Michal Suchanek |
28f610 |
+ * and we can retry in the hope that the cause has
|
|
Michal Suchanek |
28f610 |
+ * cleared after some delay.
|
|
Michal Suchanek |
28f610 |
+ *
|
|
Michal Suchanek |
28f610 |
+ * A better design would allow drivers etc to prepare
|
|
Michal Suchanek |
28f610 |
+ * for the suspend and avoid conditions which prevent
|
|
Michal Suchanek |
28f610 |
+ * the suspend from succeeding. For now, we have this
|
|
Michal Suchanek |
28f610 |
+ * mitigation.
|
|
Michal Suchanek |
28f610 |
+ */
|
|
Michal Suchanek |
28f610 |
+ pr_notice("Partition suspend attempt %u of %u error: %d\n",
|
|
Michal Suchanek |
28f610 |
+ attempt, max_attempts, ret);
|
|
Michal Suchanek |
28f610 |
+
|
|
Michal Suchanek |
28f610 |
+ if (attempt == max_attempts)
|
|
Michal Suchanek |
28f610 |
+ break;
|
|
Michal Suchanek |
28f610 |
+
|
|
Michal Suchanek |
28f610 |
+ vasi_err = poll_vasi_state(handle, &vasi_state);
|
|
Michal Suchanek |
28f610 |
+ if (vasi_err == 0) {
|
|
Michal Suchanek |
28f610 |
+ if (vasi_state != H_VASI_SUSPENDING) {
|
|
Michal Suchanek |
28f610 |
+ pr_notice("VASI state %lu after failed suspend\n",
|
|
Michal Suchanek |
28f610 |
+ vasi_state);
|
|
Michal Suchanek |
28f610 |
+ break;
|
|
Michal Suchanek |
28f610 |
+ }
|
|
Michal Suchanek |
28f610 |
+ } else if (vasi_err != -EOPNOTSUPP) {
|
|
Michal Suchanek |
28f610 |
+ pr_err("VASI state poll error: %d", vasi_err);
|
|
Michal Suchanek |
28f610 |
+ break;
|
|
Michal Suchanek |
28f610 |
+ }
|
|
Michal Suchanek |
28f610 |
+
|
|
Michal Suchanek |
28f610 |
+ pr_notice("Will retry partition suspend after %u ms\n",
|
|
Michal Suchanek |
28f610 |
+ retry_interval_ms);
|
|
Michal Suchanek |
28f610 |
+
|
|
Michal Suchanek |
28f610 |
+ msleep(retry_interval_ms);
|
|
Michal Suchanek |
28f610 |
+ retry_interval_ms *= 10;
|
|
Michal Suchanek |
28f610 |
+ attempt++;
|
|
Michal Suchanek |
28f610 |
+ }
|
|
Michal Suchanek |
28f610 |
+
|
|
Michal Suchanek |
28f610 |
+ return ret;
|
|
Michal Suchanek |
28f610 |
+}
|
|
Michal Suchanek |
28f610 |
+
|
|
Michal Suchanek |
28f610 |
static int pseries_migrate_partition(u64 handle)
|
|
Michal Suchanek |
28f610 |
{
|
|
Michal Suchanek |
28f610 |
- atomic_t counter = ATOMIC_INIT(0);
|
|
Michal Suchanek |
28f610 |
int ret;
|
|
Michal Suchanek |
28f610 |
|
|
Michal Suchanek |
28f610 |
ret = wait_for_vasi_session_suspending(handle);
|
|
Michal Suchanek |
28f610 |
if (ret)
|
|
Michal Suchanek |
28f610 |
return ret;
|
|
Michal Suchanek |
28f610 |
|
|
Michal Suchanek |
28f610 |
- ret = stop_machine(do_join, &counter, cpu_online_mask);
|
|
Michal Suchanek |
28f610 |
+ ret = pseries_suspend(handle);
|
|
Michal Suchanek |
28f610 |
if (ret == 0)
|
|
Michal Suchanek |
28f610 |
post_mobility_fixup();
|
|
Michal Suchanek |
28f610 |
else
|
|
Michal Suchanek |
28f610 |
--
|
|
Michal Suchanek |
28f610 |
2.26.2
|
|
Michal Suchanek |
28f610 |
|