Hannes Reinecke 4805fd
From: Sagi Grimberg <sagi@grimberg.me>
Hannes Reinecke 4805fd
Date: Wed, 22 Jul 2020 16:32:19 -0700
Hannes Reinecke 4805fd
Subject: [PATCH] nvme: fix deadlock in disconnect during scan_work and/or
Hannes Reinecke 4805fd
 ana_work
Hannes Reinecke 4805fd
Git-commit: ecca390e80561debbfdb4dc96bf94595136889fa
Hannes Reinecke 4805fd
Patch-mainline: v5.9-rc1
Hannes Reinecke 4805fd
References: git-fixes
Hannes Reinecke 4805fd
Hannes Reinecke 4805fd
A deadlock happens in the following scenario with multipath:
Hannes Reinecke 4805fd
1) scan_work(nvme0) detects a new nsid while nvme0
Hannes Reinecke 4805fd
    is an optimized path to it, path nvme1 happens to be
Hannes Reinecke 4805fd
    inaccessible.
Hannes Reinecke 4805fd
Hannes Reinecke 4805fd
2) Before scan_work is complete nvme0 disconnect is initiated
Hannes Reinecke 4805fd
    nvme_delete_ctrl_sync() sets nvme0 state to NVME_CTRL_DELETING
Hannes Reinecke 4805fd
Hannes Reinecke 4805fd
3) scan_work(1) attempts to submit IO,
Hannes Reinecke 4805fd
    but nvme_path_is_optimized() observes nvme0 is not LIVE.
Hannes Reinecke 4805fd
    Since nvme1 is a possible path IO is requeued and scan_work hangs.
Hannes Reinecke 4805fd
Hannes Reinecke 4805fd
--
Hannes Reinecke 4805fd
Workqueue: nvme-wq nvme_scan_work [nvme_core]
Hannes Reinecke 4805fd
kernel: Call Trace:
Hannes Reinecke 4805fd
kernel:  __schedule+0x2b9/0x6c0
Hannes Reinecke 4805fd
kernel:  schedule+0x42/0xb0
Hannes Reinecke 4805fd
kernel:  io_schedule+0x16/0x40
Hannes Reinecke 4805fd
kernel:  do_read_cache_page+0x438/0x830
Hannes Reinecke 4805fd
kernel:  read_cache_page+0x12/0x20
Hannes Reinecke 4805fd
kernel:  read_dev_sector+0x27/0xc0
Hannes Reinecke 4805fd
kernel:  read_lba+0xc1/0x220
Hannes Reinecke 4805fd
kernel:  efi_partition+0x1e6/0x708
Hannes Reinecke 4805fd
kernel:  check_partition+0x154/0x244
Hannes Reinecke 4805fd
kernel:  rescan_partitions+0xae/0x280
Hannes Reinecke 4805fd
kernel:  __blkdev_get+0x40f/0x560
Hannes Reinecke 4805fd
kernel:  blkdev_get+0x3d/0x140
Hannes Reinecke 4805fd
kernel:  __device_add_disk+0x388/0x480
Hannes Reinecke 4805fd
kernel:  device_add_disk+0x13/0x20
Hannes Reinecke 4805fd
kernel:  nvme_mpath_set_live+0x119/0x140 [nvme_core]
Hannes Reinecke 4805fd
kernel:  nvme_update_ns_ana_state+0x5c/0x60 [nvme_core]
Hannes Reinecke 4805fd
kernel:  nvme_set_ns_ana_state+0x1e/0x30 [nvme_core]
Hannes Reinecke 4805fd
kernel:  nvme_parse_ana_log+0xa1/0x180 [nvme_core]
Hannes Reinecke 4805fd
kernel:  nvme_mpath_add_disk+0x47/0x90 [nvme_core]
Hannes Reinecke 4805fd
kernel:  nvme_validate_ns+0x396/0x940 [nvme_core]
Hannes Reinecke 4805fd
kernel:  nvme_scan_work+0x24f/0x380 [nvme_core]
Hannes Reinecke 4805fd
kernel:  process_one_work+0x1db/0x380
Hannes Reinecke 4805fd
kernel:  worker_thread+0x249/0x400
Hannes Reinecke 4805fd
kernel:  kthread+0x104/0x140
Hannes Reinecke 4805fd
--
Hannes Reinecke 4805fd
Hannes Reinecke 4805fd
4) Delete also hangs in flush_work(ctrl->scan_work)
Hannes Reinecke 4805fd
    from nvme_remove_namespaces().
Hannes Reinecke 4805fd
Hannes Reinecke 4805fd
Similiarly a deadlock with ana_work may happen: if ana_work has started
Hannes Reinecke 4805fd
and calls nvme_mpath_set_live and device_add_disk, it will
Hannes Reinecke 4805fd
trigger I/O. When we trigger disconnect I/O will block because
Hannes Reinecke 4805fd
our accessible (optimized) path is disconnecting, but the alternate
Hannes Reinecke 4805fd
path is inaccessible, so I/O blocks. Then disconnect tries to flush
Hannes Reinecke 4805fd
the ana_work and hangs.
Hannes Reinecke 4805fd
Hannes Reinecke 4805fd
[  605.550896] Workqueue: nvme-wq nvme_ana_work [nvme_core]
Hannes Reinecke 4805fd
[  605.552087] Call Trace:
Hannes Reinecke 4805fd
[  605.552683]  __schedule+0x2b9/0x6c0
Hannes Reinecke 4805fd
[  605.553507]  schedule+0x42/0xb0
Hannes Reinecke 4805fd
[  605.554201]  io_schedule+0x16/0x40
Hannes Reinecke 4805fd
[  605.555012]  do_read_cache_page+0x438/0x830
Hannes Reinecke 4805fd
[  605.556925]  read_cache_page+0x12/0x20
Hannes Reinecke 4805fd
[  605.557757]  read_dev_sector+0x27/0xc0
Hannes Reinecke 4805fd
[  605.558587]  amiga_partition+0x4d/0x4c5
Hannes Reinecke 4805fd
[  605.561278]  check_partition+0x154/0x244
Hannes Reinecke 4805fd
[  605.562138]  rescan_partitions+0xae/0x280
Hannes Reinecke 4805fd
[  605.563076]  __blkdev_get+0x40f/0x560
Hannes Reinecke 4805fd
[  605.563830]  blkdev_get+0x3d/0x140
Hannes Reinecke 4805fd
[  605.564500]  __device_add_disk+0x388/0x480
Hannes Reinecke 4805fd
[  605.565316]  device_add_disk+0x13/0x20
Hannes Reinecke 4805fd
[  605.566070]  nvme_mpath_set_live+0x5e/0x130 [nvme_core]
Hannes Reinecke 4805fd
[  605.567114]  nvme_update_ns_ana_state+0x2c/0x30 [nvme_core]
Hannes Reinecke 4805fd
[  605.568197]  nvme_update_ana_state+0xca/0xe0 [nvme_core]
Hannes Reinecke 4805fd
[  605.569360]  nvme_parse_ana_log+0xa1/0x180 [nvme_core]
Hannes Reinecke 4805fd
[  605.571385]  nvme_read_ana_log+0x76/0x100 [nvme_core]
Hannes Reinecke 4805fd
[  605.572376]  nvme_ana_work+0x15/0x20 [nvme_core]
Hannes Reinecke 4805fd
[  605.573330]  process_one_work+0x1db/0x380
Hannes Reinecke 4805fd
[  605.574144]  worker_thread+0x4d/0x400
Hannes Reinecke 4805fd
[  605.574896]  kthread+0x104/0x140
Hannes Reinecke 4805fd
[  605.577205]  ret_from_fork+0x35/0x40
Hannes Reinecke 4805fd
[  605.577955] INFO: task nvme:14044 blocked for more than 120 seconds.
Hannes Reinecke 4805fd
[  605.579239]       Tainted: G           OE     5.3.5-050305-generic #201910071830
Hannes Reinecke 4805fd
[  605.580712] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
Hannes Reinecke 4805fd
[  605.582320] nvme            D    0 14044  14043 0x00000000
Hannes Reinecke 4805fd
[  605.583424] Call Trace:
Hannes Reinecke 4805fd
[  605.583935]  __schedule+0x2b9/0x6c0
Hannes Reinecke 4805fd
[  605.584625]  schedule+0x42/0xb0
Hannes Reinecke 4805fd
[  605.585290]  schedule_timeout+0x203/0x2f0
Hannes Reinecke 4805fd
[  605.588493]  wait_for_completion+0xb1/0x120
Hannes Reinecke 4805fd
[  605.590066]  __flush_work+0x123/0x1d0
Hannes Reinecke 4805fd
[  605.591758]  __cancel_work_timer+0x10e/0x190
Hannes Reinecke 4805fd
[  605.593542]  cancel_work_sync+0x10/0x20
Hannes Reinecke 4805fd
[  605.594347]  nvme_mpath_stop+0x2f/0x40 [nvme_core]
Hannes Reinecke 4805fd
[  605.595328]  nvme_stop_ctrl+0x12/0x50 [nvme_core]
Hannes Reinecke 4805fd
[  605.596262]  nvme_do_delete_ctrl+0x3f/0x90 [nvme_core]
Hannes Reinecke 4805fd
[  605.597333]  nvme_sysfs_delete+0x5c/0x70 [nvme_core]
Hannes Reinecke 4805fd
[  605.598320]  dev_attr_store+0x17/0x30
Hannes Reinecke 4805fd
Hannes Reinecke 4805fd
Fix this by introducing a new state: NVME_CTRL_DELETE_NOIO, which will
Hannes Reinecke 4805fd
indicate the phase of controller deletion where I/O cannot be allowed
Hannes Reinecke 4805fd
to access the namespace. NVME_CTRL_DELETING still allows mpath I/O to
Hannes Reinecke 4805fd
be issued to the bottom device, and only after we flush the ana_work
Hannes Reinecke 4805fd
and scan_work (after nvme_stop_ctrl and nvme_prep_remove_namespaces)
Hannes Reinecke 4805fd
we change the state to NVME_CTRL_DELETING_NOIO. Also we prevent ana_work
Hannes Reinecke 4805fd
from re-firing by aborting early if we are not LIVE, so we should be safe
Hannes Reinecke 4805fd
here.
Hannes Reinecke 4805fd
Hannes Reinecke 4805fd
In addition, change the transport drivers to follow the updated state
Hannes Reinecke 4805fd
machine.
Hannes Reinecke 4805fd
Hannes Reinecke 4805fd
Fixes: 0d0b660f214d ("nvme: add ANA support")
Hannes Reinecke 4805fd
Reported-by: Anton Eidelman <anton@lightbitslabs.com>
Hannes Reinecke 4805fd
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Hannes Reinecke 4805fd
Signed-off-by: Christoph Hellwig <hch@lst.de>
Hannes Reinecke 4805fd
Acked-by: Hannes Reinecke <hare@suse.com>
Hannes Reinecke 4805fd
---
Hannes Reinecke 4805fd
 drivers/nvme/host/core.c      | 15 +++++++++++++++
Hannes Reinecke 4805fd
 drivers/nvme/host/fabrics.c   |  2 +-
Hannes Reinecke 4805fd
 drivers/nvme/host/fabrics.h   |  3 ++-
Hannes Reinecke 4805fd
 drivers/nvme/host/fc.c        |  1 +
Hannes Reinecke 4805fd
 drivers/nvme/host/multipath.c | 18 +++++++++++++++---
Hannes Reinecke 4805fd
 drivers/nvme/host/nvme.h      |  6 ++++++
Hannes Reinecke 4805fd
 drivers/nvme/host/rdma.c      | 10 ++++++----
Hannes Reinecke 4805fd
 drivers/nvme/host/tcp.c       | 15 +++++++++------
Hannes Reinecke 4805fd
 8 files changed, 55 insertions(+), 15 deletions(-)
Hannes Reinecke 4805fd
Hannes Reinecke 4805fd
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
Hannes Reinecke 4805fd
index 1d7c7afb1348..c16bfdff2953 100644
Hannes Reinecke 4805fd
--- a/drivers/nvme/host/core.c
Hannes Reinecke 4805fd
+++ b/drivers/nvme/host/core.c
Hannes Reinecke 4805fd
@@ -366,6 +366,16 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
Hannes Reinecke 4805fd
 			break;
Hannes Reinecke 4805fd
 		}
Hannes Reinecke 4805fd
 		break;
Hannes Reinecke 4805fd
+	case NVME_CTRL_DELETING_NOIO:
Hannes Reinecke 4805fd
+		switch (old_state) {
Hannes Reinecke 4805fd
+		case NVME_CTRL_DELETING:
Hannes Reinecke 4805fd
+		case NVME_CTRL_DEAD:
Hannes Reinecke 4805fd
+			changed = true;
Hannes Reinecke 4805fd
+			/* FALLTHRU */
Hannes Reinecke 4805fd
+		default:
Hannes Reinecke 4805fd
+			break;
Hannes Reinecke 4805fd
+		}
Hannes Reinecke 4805fd
+		break;
Hannes Reinecke 4805fd
 	case NVME_CTRL_DEAD:
Hannes Reinecke 4805fd
 		switch (old_state) {
Hannes Reinecke 4805fd
 		case NVME_CTRL_DELETING:
Hannes Reinecke 4805fd
@@ -403,6 +413,7 @@ static bool nvme_state_terminal(struct nvme_ctrl *ctrl)
Hannes Reinecke 4805fd
 	case NVME_CTRL_CONNECTING:
Hannes Reinecke 4805fd
 		return false;
Hannes Reinecke 4805fd
 	case NVME_CTRL_DELETING:
Hannes Reinecke 4805fd
+	case NVME_CTRL_DELETING_NOIO:
Hannes Reinecke 4805fd
 	case NVME_CTRL_DEAD:
Hannes Reinecke 4805fd
 		return true;
Hannes Reinecke 4805fd
 	default:
Hannes Reinecke 4805fd
@@ -3476,6 +3487,7 @@ static ssize_t nvme_sysfs_show_state(struct device *dev,
Hannes Reinecke 4805fd
 		[NVME_CTRL_RESETTING]	= "resetting",
Hannes Reinecke 4805fd
 		[NVME_CTRL_CONNECTING]	= "connecting",
Hannes Reinecke 4805fd
 		[NVME_CTRL_DELETING]	= "deleting",
Hannes Reinecke 4805fd
+		[NVME_CTRL_DELETING_NOIO]= "deleting (no IO)",
Hannes Reinecke 4805fd
 		[NVME_CTRL_DEAD]	= "dead",
Hannes Reinecke 4805fd
 	};
Hannes Reinecke 4805fd
 
Hannes Reinecke 4805fd
@@ -4112,6 +4124,9 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
Hannes Reinecke 4805fd
 	if (ctrl->state == NVME_CTRL_DEAD)
Hannes Reinecke 4805fd
 		nvme_kill_queues(ctrl);
Hannes Reinecke 4805fd
 
Hannes Reinecke 4805fd
+	/* this is a no-op when called from the controller reset handler */
Hannes Reinecke 4805fd
+	nvme_change_ctrl_state(ctrl, NVME_CTRL_DELETING_NOIO);
Hannes Reinecke 4805fd
+
Hannes Reinecke 4805fd
 	down_write(&ctrl->namespaces_rwsem);
Hannes Reinecke 4805fd
 	list_splice_init(&ctrl->namespaces, &ns_list);
Hannes Reinecke 4805fd
 	up_write(&ctrl->namespaces_rwsem);
Hannes Reinecke 4805fd
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
Hannes Reinecke 4805fd
index 2a6c8190eeb7..4ec4829d6233 100644
Hannes Reinecke 4805fd
--- a/drivers/nvme/host/fabrics.c
Hannes Reinecke 4805fd
+++ b/drivers/nvme/host/fabrics.c
Hannes Reinecke 4805fd
@@ -547,7 +547,7 @@ static struct nvmf_transport_ops *nvmf_lookup_transport(
Hannes Reinecke 4805fd
 blk_status_t nvmf_fail_nonready_command(struct nvme_ctrl *ctrl,
Hannes Reinecke 4805fd
 		struct request *rq)
Hannes Reinecke 4805fd
 {
Hannes Reinecke 4805fd
-	if (ctrl->state != NVME_CTRL_DELETING &&
Hannes Reinecke 4805fd
+	if (ctrl->state != NVME_CTRL_DELETING_NOIO &&
Hannes Reinecke 4805fd
 	    ctrl->state != NVME_CTRL_DEAD &&
Hannes Reinecke 4805fd
 	    !blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
Hannes Reinecke 4805fd
 		return BLK_STS_RESOURCE;
Hannes Reinecke 4805fd
diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
Hannes Reinecke 4805fd
index a0ec40ab62ee..a9c1e3b4585e 100644
Hannes Reinecke 4805fd
--- a/drivers/nvme/host/fabrics.h
Hannes Reinecke 4805fd
+++ b/drivers/nvme/host/fabrics.h
Hannes Reinecke 4805fd
@@ -182,7 +182,8 @@ bool nvmf_ip_options_match(struct nvme_ctrl *ctrl,
Hannes Reinecke 4805fd
 static inline bool nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
Hannes Reinecke 4805fd
 		bool queue_live)
Hannes Reinecke 4805fd
 {
Hannes Reinecke 4805fd
-	if (likely(ctrl->state == NVME_CTRL_LIVE))
Hannes Reinecke 4805fd
+	if (likely(ctrl->state == NVME_CTRL_LIVE ||
Hannes Reinecke 4805fd
+		   ctrl->state == NVME_CTRL_DELETING))
Hannes Reinecke 4805fd
 		return true;
Hannes Reinecke 4805fd
 	return __nvmf_check_ready(ctrl, rq, queue_live);
Hannes Reinecke 4805fd
 }
Hannes Reinecke 4805fd
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
Hannes Reinecke 4805fd
index 6aa30bb5a762..b27c54dc6683 100644
Hannes Reinecke 4805fd
--- a/drivers/nvme/host/fc.c
Hannes Reinecke 4805fd
+++ b/drivers/nvme/host/fc.c
Hannes Reinecke 4805fd
@@ -826,6 +826,7 @@ nvme_fc_ctrl_connectivity_loss(struct nvme_fc_ctrl *ctrl)
Hannes Reinecke 4805fd
 		break;
Hannes Reinecke 4805fd
 
Hannes Reinecke 4805fd
 	case NVME_CTRL_DELETING:
Hannes Reinecke 4805fd
+	case NVME_CTRL_DELETING_NOIO:
Hannes Reinecke 4805fd
 	default:
Hannes Reinecke 4805fd
 		/* no action to take - let it delete */
Hannes Reinecke 4805fd
 		break;
Hannes Reinecke 4805fd
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
Hannes Reinecke 4805fd
index 74bad4e3d377..900b35d47ec7 100644
Hannes Reinecke 4805fd
--- a/drivers/nvme/host/multipath.c
Hannes Reinecke 4805fd
+++ b/drivers/nvme/host/multipath.c
Hannes Reinecke 4805fd
@@ -167,9 +167,18 @@ void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl)
Hannes Reinecke 4805fd
 
Hannes Reinecke 4805fd
 static bool nvme_path_is_disabled(struct nvme_ns *ns)
Hannes Reinecke 4805fd
 {
Hannes Reinecke 4805fd
-	return ns->ctrl->state != NVME_CTRL_LIVE ||
Hannes Reinecke 4805fd
-		test_bit(NVME_NS_ANA_PENDING, &ns->flags) ||
Hannes Reinecke 4805fd
-		test_bit(NVME_NS_REMOVING, &ns->flags);
Hannes Reinecke 4805fd
+	/*
Hannes Reinecke 4805fd
+	 * We don't treat NVME_CTRL_DELETING as a disabled path as I/O should
Hannes Reinecke 4805fd
+	 * still be able to complete assuming that the controller is connected.
Hannes Reinecke 4805fd
+	 * Otherwise it will fail immediately and return to the requeue list.
Hannes Reinecke 4805fd
+	 */
Hannes Reinecke 4805fd
+	if (ns->ctrl->state != NVME_CTRL_LIVE &&
Hannes Reinecke 4805fd
+	    ns->ctrl->state != NVME_CTRL_DELETING)
Hannes Reinecke 4805fd
+		return true;
Hannes Reinecke 4805fd
+	if (test_bit(NVME_NS_ANA_PENDING, &ns->flags) ||
Hannes Reinecke 4805fd
+	    test_bit(NVME_NS_REMOVING, &ns->flags))
Hannes Reinecke 4805fd
+		return true;
Hannes Reinecke 4805fd
+	return false;
Hannes Reinecke 4805fd
 }
Hannes Reinecke 4805fd
 
Hannes Reinecke 4805fd
 static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head, int node)
Hannes Reinecke 4805fd
@@ -563,6 +572,9 @@ static void nvme_ana_work(struct work_struct *work)
Hannes Reinecke 4805fd
 {
Hannes Reinecke 4805fd
 	struct nvme_ctrl *ctrl = container_of(work, struct nvme_ctrl, ana_work);
Hannes Reinecke 4805fd
 
Hannes Reinecke 4805fd
+	if (ctrl->state != NVME_CTRL_LIVE)
Hannes Reinecke 4805fd
+		return;
Hannes Reinecke 4805fd
+
Hannes Reinecke 4805fd
 	nvme_read_ana_log(ctrl);
Hannes Reinecke 4805fd
 }
Hannes Reinecke 4805fd
 
Hannes Reinecke 4805fd
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
Hannes Reinecke 4805fd
index 92629758b77c..1609267a1f0e 100644
Hannes Reinecke 4805fd
--- a/drivers/nvme/host/nvme.h
Hannes Reinecke 4805fd
+++ b/drivers/nvme/host/nvme.h
Hannes Reinecke 4805fd
@@ -190,6 +190,11 @@ static inline u16 nvme_req_qid(struct request *req)
Hannes Reinecke 4805fd
  * @NVME_CTRL_CONNECTING:	Controller is disconnected, now connecting the
Hannes Reinecke 4805fd
  *				transport
Hannes Reinecke 4805fd
  * @NVME_CTRL_DELETING:		Controller is deleting (or scheduled deletion)
Hannes Reinecke 4805fd
+ * @NVME_CTRL_DELETING_NOIO:	Controller is deleting and I/O is not
Hannes Reinecke 4805fd
+ *				disabled/failed immediately. This state comes
Hannes Reinecke 4805fd
+ * 				after all async event processing took place and
Hannes Reinecke 4805fd
+ * 				before ns removal and the controller deletion
Hannes Reinecke 4805fd
+ * 				progress
Hannes Reinecke 4805fd
  * @NVME_CTRL_DEAD:		Controller is non-present/unresponsive during
Hannes Reinecke 4805fd
  *				shutdown or removal. In this case we forcibly
Hannes Reinecke 4805fd
  *				kill all inflight I/O as they have no chance to
Hannes Reinecke 4805fd
@@ -201,6 +206,7 @@ enum nvme_ctrl_state {
Hannes Reinecke 4805fd
 	NVME_CTRL_RESETTING,
Hannes Reinecke 4805fd
 	NVME_CTRL_CONNECTING,
Hannes Reinecke 4805fd
 	NVME_CTRL_DELETING,
Hannes Reinecke 4805fd
+	NVME_CTRL_DELETING_NOIO,
Hannes Reinecke 4805fd
 	NVME_CTRL_DEAD,
Hannes Reinecke 4805fd
 };
Hannes Reinecke 4805fd
 
Hannes Reinecke 4805fd
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
Hannes Reinecke 4805fd
index 467da08db309..5c3848974ccb 100644
Hannes Reinecke 4805fd
--- a/drivers/nvme/host/rdma.c
Hannes Reinecke 4805fd
+++ b/drivers/nvme/host/rdma.c
Hannes Reinecke 4805fd
@@ -1102,11 +1102,12 @@ static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new)
Hannes Reinecke 4805fd
 	changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
Hannes Reinecke 4805fd
 	if (!changed) {
Hannes Reinecke 4805fd
 		/*
Hannes Reinecke 4805fd
-		 * state change failure is ok if we're in DELETING state,
Hannes Reinecke 4805fd
+		 * state change failure is ok if we started ctrl delete,
Hannes Reinecke 4805fd
 		 * unless we're during creation of a new controller to
Hannes Reinecke 4805fd
 		 * avoid races with teardown flow.
Hannes Reinecke 4805fd
 		 */
Hannes Reinecke 4805fd
-		WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING);
Hannes Reinecke 4805fd
+		WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING &&
Hannes Reinecke 4805fd
+			     ctrl->ctrl.state != NVME_CTRL_DELETING_NOIO);
Hannes Reinecke 4805fd
 		WARN_ON_ONCE(new);
Hannes Reinecke 4805fd
 		ret = -EINVAL;
Hannes Reinecke 4805fd
 		goto destroy_io;
Hannes Reinecke 4805fd
@@ -1159,8 +1160,9 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
Hannes Reinecke 4805fd
 	blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
Hannes Reinecke 4805fd
 
Hannes Reinecke 4805fd
 	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
Hannes Reinecke 4805fd
-		/* state change failure is ok if we're in DELETING state */
Hannes Reinecke 4805fd
-		WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING);
Hannes Reinecke 4805fd
+		/* state change failure is ok if we started ctrl delete */
Hannes Reinecke 4805fd
+		WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING &&
Hannes Reinecke 4805fd
+			     ctrl->ctrl.state != NVME_CTRL_DELETING_NOIO);
Hannes Reinecke 4805fd
 		return;
Hannes Reinecke 4805fd
 	}
Hannes Reinecke 4805fd
 
Hannes Reinecke 4805fd
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
Hannes Reinecke 4805fd
index b2e73e19ef01..8c8fb65ca928 100644
Hannes Reinecke 4805fd
--- a/drivers/nvme/host/tcp.c
Hannes Reinecke 4805fd
+++ b/drivers/nvme/host/tcp.c
Hannes Reinecke 4805fd
@@ -1950,11 +1950,12 @@ static int nvme_tcp_setup_ctrl(struct nvme_ctrl *ctrl, bool new)
Hannes Reinecke 4805fd
 
Hannes Reinecke 4805fd
 	if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_LIVE)) {
Hannes Reinecke 4805fd
 		/*
Hannes Reinecke 4805fd
-		 * state change failure is ok if we're in DELETING state,
Hannes Reinecke 4805fd
+		 * state change failure is ok if we started ctrl delete,
Hannes Reinecke 4805fd
 		 * unless we're during creation of a new controller to
Hannes Reinecke 4805fd
 		 * avoid races with teardown flow.
Hannes Reinecke 4805fd
 		 */
Hannes Reinecke 4805fd
-		WARN_ON_ONCE(ctrl->state != NVME_CTRL_DELETING);
Hannes Reinecke 4805fd
+		WARN_ON_ONCE(ctrl->state != NVME_CTRL_DELETING &&
Hannes Reinecke 4805fd
+			     ctrl->state != NVME_CTRL_DELETING_NOIO);
Hannes Reinecke 4805fd
 		WARN_ON_ONCE(new);
Hannes Reinecke 4805fd
 		ret = -EINVAL;
Hannes Reinecke 4805fd
 		goto destroy_io;
Hannes Reinecke 4805fd
@@ -2010,8 +2011,9 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work)
Hannes Reinecke 4805fd
 	blk_mq_unquiesce_queue(ctrl->admin_q);
Hannes Reinecke 4805fd
 
Hannes Reinecke 4805fd
 	if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_CONNECTING)) {
Hannes Reinecke 4805fd
-		/* state change failure is ok if we're in DELETING state */
Hannes Reinecke 4805fd
-		WARN_ON_ONCE(ctrl->state != NVME_CTRL_DELETING);
Hannes Reinecke 4805fd
+		/* state change failure is ok if we started ctrl delete */
Hannes Reinecke 4805fd
+		WARN_ON_ONCE(ctrl->state != NVME_CTRL_DELETING &&
Hannes Reinecke 4805fd
+			     ctrl->state != NVME_CTRL_DELETING_NOIO);
Hannes Reinecke 4805fd
 		return;
Hannes Reinecke 4805fd
 	}
Hannes Reinecke 4805fd
 
Hannes Reinecke 4805fd
@@ -2046,8 +2048,9 @@ static void nvme_reset_ctrl_work(struct work_struct *work)
Hannes Reinecke 4805fd
 	nvme_tcp_teardown_ctrl(ctrl, false);
Hannes Reinecke 4805fd
 
Hannes Reinecke 4805fd
 	if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_CONNECTING)) {
Hannes Reinecke 4805fd
-		/* state change failure is ok if we're in DELETING state */
Hannes Reinecke 4805fd
-		WARN_ON_ONCE(ctrl->state != NVME_CTRL_DELETING);
Hannes Reinecke 4805fd
+		/* state change failure is ok if we started ctrl delete */
Hannes Reinecke 4805fd
+		WARN_ON_ONCE(ctrl->state != NVME_CTRL_DELETING &&
Hannes Reinecke 4805fd
+			     ctrl->state != NVME_CTRL_DELETING_NOIO);
Hannes Reinecke 4805fd
 		return;
Hannes Reinecke 4805fd
 	}
Hannes Reinecke 4805fd
 
Hannes Reinecke 4805fd
-- 
Hannes Reinecke 4805fd
2.29.2
Hannes Reinecke 4805fd