Blob Blame History Raw
From: Chris Leech <cleech@redhat.com>
Date: Mon, 21 Mar 2022 15:43:04 -0700
Subject: nvme: fix RCU hole that allowed for endless looping in multipath
 round robin
Patch-mainline: v5.18-rc1
Git-commit: d6d6742772d712ed2238f5071b96baf4924f5fad
References: bsc#1202636

Make nvme_ns_remove match the assumptions elsewhere.

1) !NVME_NS_READY needs to be srcu synchronized to make sure nothing is
   running in __nvme_find_path or nvme_round_robin_path that will
   re-assign this ns to current_path.

2) Any matching current_path entries need to be cleared before removing
   from the siblings list, to prevent calling nvme_round_robin_path with
   an "old" ns that's off list.

3) Finally the list_del_rcu can happen, and then synchronize again
   before releasing any reference counts.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Daniel Wagner <dwagner@suse.de>
---
 drivers/nvme/host/core.c |   14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -3947,6 +3947,16 @@ static void nvme_ns_remove(struct nvme_n
 	set_capacity(ns->disk, 0);
 	nvme_fault_inject_fini(&ns->fault_inject);
 
+	/*
+	 * Ensure that !NVME_NS_READY is seen by other threads to prevent
+	 * this ns going back into current_path.
+	 */
+	synchronize_srcu(&ns->head->srcu);
+
+	/* wait for concurrent submissions */
+	if (nvme_mpath_clear_current_path(ns))
+		synchronize_srcu(&ns->head->srcu);
+
 	mutex_lock(&ns->ctrl->subsys->lock);
 	list_del_rcu(&ns->siblings);
 	if (list_empty(&ns->head->list)) {
@@ -3958,10 +3968,6 @@ static void nvme_ns_remove(struct nvme_n
 	/* guarantee not available in head->list */
 	synchronize_rcu();
 
-	/* wait for concurrent submissions */
-	if (nvme_mpath_clear_current_path(ns))
-		synchronize_srcu(&ns->head->srcu);
-
 	if (!nvme_ns_head_multipath(ns->head))
 		nvme_cdev_del(&ns->cdev, &ns->cdev_device);
 	del_gendisk(ns->disk);