Hannes Reinecke 849c71
From: Keith Busch <kbusch@kernel.org>
Hannes Reinecke 849c71
Date: Wed, 4 Sep 2019 10:06:11 -0600
Hannes Reinecke 849c71
Subject: [PATCH] nvme: Wait for reset state when required
Hannes Reinecke 849c71
Git-commit: c1ac9a4b0797ca8bb4470f863a5f78ef1ab13bed
Hannes Reinecke 849c71
Patch-mainline: v5.4-rc4
Hannes Reinecke 849c71
References: bsc#1169045
Hannes Reinecke 849c71
Hannes Reinecke 849c71
Prevent simultaneous controller disabling/enabling tasks from interfering
Hannes Reinecke 849c71
with each other through a function to wait until the task successfully
Hannes Reinecke 849c71
transitioned the controller to the RESETTING state. This ensures disabling
Hannes Reinecke 849c71
the controller will not be interrupted by another reset path, otherwise
Hannes Reinecke 849c71
a concurrent reset may leave the controller in the wrong state.
Hannes Reinecke 849c71
Hannes Reinecke 849c71
Tested-by: Edmund Nadolski <edmund.nadolski@intel.com>
Hannes Reinecke 849c71
Reviewed-by: Christoph Hellwig <hch@lst.de>
Hannes Reinecke 849c71
Signed-off-by: Keith Busch <kbusch@kernel.org>
Hannes Reinecke 849c71
Acked-by: Hannes Reinecke <hare@suse.com>
Hannes Reinecke 849c71
---
Hannes Reinecke 849c71
 drivers/nvme/host/core.c | 41 +++++++++++++++++++++++++++++++++++++++--
Hannes Reinecke 849c71
 drivers/nvme/host/nvme.h |  4 ++++
Hannes Reinecke 849c71
 drivers/nvme/host/pci.c  | 46 +++++++++++++++++++++++++++++++---------------
Hannes Reinecke 849c71
 3 files changed, 74 insertions(+), 17 deletions(-)
Hannes Reinecke 849c71
Hannes Reinecke 849c71
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
Hannes Reinecke 849c71
index e94fa693dd4b..fa7ba09dca77 100644
Hannes Reinecke 849c71
--- a/drivers/nvme/host/core.c
Hannes Reinecke 849c71
+++ b/drivers/nvme/host/core.c
Hannes Reinecke 849c71
@@ -126,7 +126,7 @@ static void nvme_queue_scan(struct nvme_ctrl *ctrl)
Hannes Reinecke 849c71
  * code paths that can't be interrupted by other reset attempts. A hot removal
Hannes Reinecke 849c71
  * may prevent this from succeeding.
Hannes Reinecke 849c71
  */
Hannes Reinecke 849c71
-static int nvme_try_sched_reset(struct nvme_ctrl *ctrl)
Hannes Reinecke 849c71
+int nvme_try_sched_reset(struct nvme_ctrl *ctrl)
Hannes Reinecke 849c71
 {
Hannes Reinecke 849c71
 	if (ctrl->state != NVME_CTRL_RESETTING)
Hannes Reinecke 849c71
 		return -EBUSY;
Hannes Reinecke 849c71
@@ -134,6 +134,7 @@ static int nvme_try_sched_reset(struct nvme_ctrl *ctrl)
Hannes Reinecke 849c71
 		return -EBUSY;
Hannes Reinecke 849c71
 	return 0;
Hannes Reinecke 849c71
 }
Hannes Reinecke 849c71
+EXPORT_SYMBOL_GPL(nvme_try_sched_reset);
Hannes Reinecke 849c71
 
Hannes Reinecke 849c71
 int nvme_reset_ctrl(struct nvme_ctrl *ctrl)
Hannes Reinecke 849c71
 {
Hannes Reinecke 849c71
@@ -384,8 +385,10 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
Hannes Reinecke 849c71
 		break;
Hannes Reinecke 849c71
 	}
Hannes Reinecke 849c71
 
Hannes Reinecke 849c71
-	if (changed)
Hannes Reinecke 849c71
+	if (changed) {
Hannes Reinecke 849c71
 		ctrl->state = new_state;
Hannes Reinecke 849c71
+		wake_up_all(&ctrl->state_wq);
Hannes Reinecke 849c71
+	}
Hannes Reinecke 849c71
 
Hannes Reinecke 849c71
 	spin_unlock_irqrestore(&ctrl->lock, flags);
Hannes Reinecke 849c71
 	if (changed && ctrl->state == NVME_CTRL_LIVE)
Hannes Reinecke 849c71
@@ -394,6 +397,39 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
Hannes Reinecke 849c71
 }
Hannes Reinecke 849c71
 EXPORT_SYMBOL_GPL(nvme_change_ctrl_state);
Hannes Reinecke 849c71
 
Hannes Reinecke 849c71
+/*
Hannes Reinecke 849c71
+ * Returns true for sink states that can't ever transition back to live.
Hannes Reinecke 849c71
+ */
Hannes Reinecke 849c71
+static bool nvme_state_terminal(struct nvme_ctrl *ctrl)
Hannes Reinecke 849c71
+{
Hannes Reinecke 849c71
+	switch (ctrl->state) {
Hannes Reinecke 849c71
+	case NVME_CTRL_NEW:
Hannes Reinecke 849c71
+	case NVME_CTRL_LIVE:
Hannes Reinecke 849c71
+	case NVME_CTRL_RESETTING:
Hannes Reinecke 849c71
+	case NVME_CTRL_CONNECTING:
Hannes Reinecke 849c71
+		return false;
Hannes Reinecke 849c71
+	case NVME_CTRL_DELETING:
Hannes Reinecke 849c71
+	case NVME_CTRL_DEAD:
Hannes Reinecke 849c71
+		return true;
Hannes Reinecke 849c71
+	default:
Hannes Reinecke 849c71
+		WARN_ONCE(1, "Unhandled ctrl state:%d", ctrl->state);
Hannes Reinecke 849c71
+		return true;
Hannes Reinecke 849c71
+	}
Hannes Reinecke 849c71
+}
Hannes Reinecke 849c71
+
Hannes Reinecke 849c71
+/*
Hannes Reinecke 849c71
+ * Waits for the controller state to be resetting, or returns false if it is
Hannes Reinecke 849c71
+ * not possible to ever transition to that state.
Hannes Reinecke 849c71
+ */
Hannes Reinecke 849c71
+bool nvme_wait_reset(struct nvme_ctrl *ctrl)
Hannes Reinecke 849c71
+{
Hannes Reinecke 849c71
+	wait_event(ctrl->state_wq,
Hannes Reinecke 849c71
+		   nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING) ||
Hannes Reinecke 849c71
+		   nvme_state_terminal(ctrl));
Hannes Reinecke 849c71
+	return ctrl->state == NVME_CTRL_RESETTING;
Hannes Reinecke 849c71
+}
Hannes Reinecke 849c71
+EXPORT_SYMBOL_GPL(nvme_wait_reset);
Hannes Reinecke 849c71
+
Hannes Reinecke 849c71
 static void nvme_free_ns_head(struct kref *ref)
Hannes Reinecke 849c71
 {
Hannes Reinecke 849c71
 	struct nvme_ns_head *head =
Hannes Reinecke 849c71
@@ -3998,6 +4034,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
Hannes Reinecke 849c71
 	INIT_WORK(&ctrl->async_event_work, nvme_async_event_work);
Hannes Reinecke 849c71
 	INIT_WORK(&ctrl->fw_act_work, nvme_fw_act_work);
Hannes Reinecke 849c71
 	INIT_WORK(&ctrl->delete_work, nvme_delete_ctrl_work);
Hannes Reinecke 849c71
+	init_waitqueue_head(&ctrl->state_wq);
Hannes Reinecke 849c71
 
Hannes Reinecke 849c71
 	INIT_DELAYED_WORK(&ctrl->ka_work, nvme_keep_alive_work);
Hannes Reinecke 849c71
 	memset(&ctrl->ka_cmd, 0, sizeof(ctrl->ka_cmd));
Hannes Reinecke 849c71
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
Hannes Reinecke 849c71
index 2ba577271ada..22e8401352c2 100644
Hannes Reinecke 849c71
--- a/drivers/nvme/host/nvme.h
Hannes Reinecke 849c71
+++ b/drivers/nvme/host/nvme.h
Hannes Reinecke 849c71
@@ -15,6 +15,7 @@
Hannes Reinecke 849c71
 #include <linux/sed-opal.h>
Hannes Reinecke 849c71
 #include <linux/fault-inject.h>
Hannes Reinecke 849c71
 #include <linux/rcupdate.h>
Hannes Reinecke 849c71
+#include <linux/wait.h>
Hannes Reinecke 849c71
 
Hannes Reinecke 849c71
 #include <trace/events/block.h>
Hannes Reinecke 849c71
 
Hannes Reinecke 849c71
@@ -198,6 +199,7 @@ struct nvme_ctrl {
Hannes Reinecke 849c71
 	struct cdev cdev;
Hannes Reinecke 849c71
 	struct work_struct reset_work;
Hannes Reinecke 849c71
 	struct work_struct delete_work;
Hannes Reinecke 849c71
+	wait_queue_head_t state_wq;
Hannes Reinecke 849c71
 
Hannes Reinecke 849c71
 	struct nvme_subsystem *subsys;
Hannes Reinecke 849c71
 	struct list_head subsys_entry;
Hannes Reinecke 849c71
@@ -448,6 +450,7 @@ void nvme_complete_rq(struct request *req);
Hannes Reinecke 849c71
 bool nvme_cancel_request(struct request *req, void *data, bool reserved);
Hannes Reinecke 849c71
 bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
Hannes Reinecke 849c71
 		enum nvme_ctrl_state new_state);
Hannes Reinecke 849c71
+bool nvme_wait_reset(struct nvme_ctrl *ctrl);
Hannes Reinecke 849c71
 int nvme_disable_ctrl(struct nvme_ctrl *ctrl);
Hannes Reinecke 849c71
 int nvme_enable_ctrl(struct nvme_ctrl *ctrl);
Hannes Reinecke 849c71
 int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl);
Hannes Reinecke 849c71
@@ -498,6 +501,7 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count);
Hannes Reinecke 849c71
 void nvme_stop_keep_alive(struct nvme_ctrl *ctrl);
Hannes Reinecke 849c71
 int nvme_reset_ctrl(struct nvme_ctrl *ctrl);
Hannes Reinecke 849c71
 int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl);
Hannes Reinecke 849c71
+int nvme_try_sched_reset(struct nvme_ctrl *ctrl);
Hannes Reinecke 849c71
 int nvme_delete_ctrl(struct nvme_ctrl *ctrl);
Hannes Reinecke 849c71
 
Hannes Reinecke 849c71
 int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp,
Hannes Reinecke 849c71
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
Hannes Reinecke 849c71
index e7e79d6af9ba..4b181969c432 100644
Hannes Reinecke 849c71
--- a/drivers/nvme/host/pci.c
Hannes Reinecke 849c71
+++ b/drivers/nvme/host/pci.c
Hannes Reinecke 849c71
@@ -2463,6 +2463,14 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
Hannes Reinecke 849c71
 	mutex_unlock(&dev->shutdown_lock);
Hannes Reinecke 849c71
 }
Hannes Reinecke 849c71
 
Hannes Reinecke 849c71
+static int nvme_disable_prepare_reset(struct nvme_dev *dev, bool shutdown)
Hannes Reinecke 849c71
+{
Hannes Reinecke 849c71
+	if (!nvme_wait_reset(&dev->ctrl))
Hannes Reinecke 849c71
+		return -EBUSY;
Hannes Reinecke 849c71
+	nvme_dev_disable(dev, shutdown);
Hannes Reinecke 849c71
+	return 0;
Hannes Reinecke 849c71
+}
Hannes Reinecke 849c71
+
Hannes Reinecke 849c71
 static int nvme_setup_prp_pools(struct nvme_dev *dev)
Hannes Reinecke 849c71
 {
Hannes Reinecke 849c71
 	dev->prp_page_pool = dma_pool_create("prp list page", dev->dev,
Hannes Reinecke 849c71
@@ -2510,6 +2518,11 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl)
Hannes Reinecke 849c71
 
Hannes Reinecke 849c71
 static void nvme_remove_dead_ctrl(struct nvme_dev *dev)
Hannes Reinecke 849c71
 {
Hannes Reinecke 849c71
+	/*
Hannes Reinecke 849c71
+	 * Set state to deleting now to avoid blocking nvme_wait_reset(), which
Hannes Reinecke 849c71
+	 * may be holding this pci_dev's device lock.
Hannes Reinecke 849c71
+	 */
Hannes Reinecke 849c71
+	nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING);
Hannes Reinecke 849c71
 	nvme_get_ctrl(&dev->ctrl);
Hannes Reinecke 849c71
 	nvme_dev_disable(dev, false);
Hannes Reinecke 849c71
 	nvme_kill_queues(&dev->ctrl);
Hannes Reinecke 849c71
@@ -2835,19 +2848,28 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
Hannes Reinecke 849c71
 static void nvme_reset_prepare(struct pci_dev *pdev)
Hannes Reinecke 849c71
 {
Hannes Reinecke 849c71
 	struct nvme_dev *dev = pci_get_drvdata(pdev);
Hannes Reinecke 849c71
-	nvme_dev_disable(dev, false);
Hannes Reinecke 849c71
+
Hannes Reinecke 849c71
+	/*
Hannes Reinecke 849c71
+	 * We don't need to check the return value from waiting for the reset
Hannes Reinecke 849c71
+	 * state as pci_dev device lock is held, making it impossible to race
Hannes Reinecke 849c71
+	 * with ->remove().
Hannes Reinecke 849c71
+	 */
Hannes Reinecke 849c71
+	nvme_disable_prepare_reset(dev, false);
Hannes Reinecke 849c71
+	nvme_sync_queues(&dev->ctrl);
Hannes Reinecke 849c71
 }
Hannes Reinecke 849c71
 
Hannes Reinecke 849c71
 static void nvme_reset_done(struct pci_dev *pdev)
Hannes Reinecke 849c71
 {
Hannes Reinecke 849c71
 	struct nvme_dev *dev = pci_get_drvdata(pdev);
Hannes Reinecke 849c71
-	nvme_reset_ctrl_sync(&dev->ctrl);
Hannes Reinecke 849c71
+
Hannes Reinecke 849c71
+	if (!nvme_try_sched_reset(&dev->ctrl))
Hannes Reinecke 849c71
+		flush_work(&dev->ctrl.reset_work);
Hannes Reinecke 849c71
 }
Hannes Reinecke 849c71
 
Hannes Reinecke 849c71
 static void nvme_shutdown(struct pci_dev *pdev)
Hannes Reinecke 849c71
 {
Hannes Reinecke 849c71
 	struct nvme_dev *dev = pci_get_drvdata(pdev);
Hannes Reinecke 849c71
-	nvme_dev_disable(dev, true);
Hannes Reinecke 849c71
+	nvme_disable_prepare_reset(dev, true);
Hannes Reinecke 849c71
 }
Hannes Reinecke 849c71
 
Hannes Reinecke 849c71
 /*
Hannes Reinecke 849c71
@@ -2900,7 +2922,7 @@ static int nvme_resume(struct device *dev)
Hannes Reinecke 849c71
 
Hannes Reinecke 849c71
 	if (ndev->last_ps == U32_MAX ||
Hannes Reinecke 849c71
 	    nvme_set_power_state(ctrl, ndev->last_ps) != 0)
Hannes Reinecke 849c71
-		nvme_reset_ctrl(ctrl);
Hannes Reinecke 849c71
+		return nvme_try_sched_reset(&ndev->ctrl);
Hannes Reinecke 849c71
 	return 0;
Hannes Reinecke 849c71
 }
Hannes Reinecke 849c71
 
Hannes Reinecke 849c71
@@ -2928,10 +2950,8 @@ static int nvme_suspend(struct device *dev)
Hannes Reinecke 849c71
 	 */
Hannes Reinecke 849c71
 	if (pm_suspend_via_firmware() || !ctrl->npss ||
Hannes Reinecke 849c71
 	    !pcie_aspm_enabled(pdev) ||
Hannes Reinecke 849c71
-	    (ndev->ctrl.quirks & NVME_QUIRK_SIMPLE_SUSPEND)) {
Hannes Reinecke 849c71
-		nvme_dev_disable(ndev, true);
Hannes Reinecke 849c71
-		return 0;
Hannes Reinecke 849c71
-	}
Hannes Reinecke 849c71
+	    (ndev->ctrl.quirks & NVME_QUIRK_SIMPLE_SUSPEND))
Hannes Reinecke 849c71
+		return nvme_disable_prepare_reset(ndev, true);
Hannes Reinecke 849c71
 
Hannes Reinecke 849c71
 	nvme_start_freeze(ctrl);
Hannes Reinecke 849c71
 	nvme_wait_freeze(ctrl);
Hannes Reinecke 849c71
@@ -2963,9 +2983,8 @@ static int nvme_suspend(struct device *dev)
Hannes Reinecke 849c71
 		 * Clearing npss forces a controller reset on resume. The
Hannes Reinecke 849c71
 		 * correct value will be resdicovered then.
Hannes Reinecke 849c71
 		 */
Hannes Reinecke 849c71
-		nvme_dev_disable(ndev, true);
Hannes Reinecke 849c71
+		ret = nvme_disable_prepare_reset(ndev, true);
Hannes Reinecke 849c71
 		ctrl->npss = 0;
Hannes Reinecke 849c71
-		ret = 0;
Hannes Reinecke 849c71
 	}
Hannes Reinecke 849c71
 unfreeze:
Hannes Reinecke 849c71
 	nvme_unfreeze(ctrl);
Hannes Reinecke 849c71
@@ -2975,9 +2994,7 @@ static int nvme_suspend(struct device *dev)
Hannes Reinecke 849c71
 static int nvme_simple_suspend(struct device *dev)
Hannes Reinecke 849c71
 {
Hannes Reinecke 849c71
 	struct nvme_dev *ndev = pci_get_drvdata(to_pci_dev(dev));
Hannes Reinecke 849c71
-
Hannes Reinecke 849c71
-	nvme_dev_disable(ndev, true);
Hannes Reinecke 849c71
-	return 0;
Hannes Reinecke 849c71
+	return nvme_disable_prepare_reset(ndev, true);
Hannes Reinecke 849c71
 }
Hannes Reinecke 849c71
 
Hannes Reinecke 849c71
 static int nvme_simple_resume(struct device *dev)
Hannes Reinecke 849c71
@@ -2985,8 +3002,7 @@ static int nvme_simple_resume(struct device *dev)
Hannes Reinecke 849c71
 	struct pci_dev *pdev = to_pci_dev(dev);
Hannes Reinecke 849c71
 	struct nvme_dev *ndev = pci_get_drvdata(pdev);
Hannes Reinecke 849c71
 
Hannes Reinecke 849c71
-	nvme_reset_ctrl(&ndev->ctrl);
Hannes Reinecke 849c71
-	return 0;
Hannes Reinecke 849c71
+	return nvme_try_sched_reset(&ndev->ctrl);
Hannes Reinecke 849c71
 }
Hannes Reinecke 849c71
 
Hannes Reinecke 849c71
 static const struct dev_pm_ops nvme_dev_pm_ops = {
Hannes Reinecke 849c71
-- 
Hannes Reinecke 849c71
2.16.4
Hannes Reinecke 849c71