Blob Blame History Raw
From: James Smart <james.smart@broadcom.com>
Date: Sun, 15 Nov 2020 11:26:36 -0800
Subject: scsi: lpfc: Unsolicited ELS leaves node in incorrect state while
 dropping it
Patch-mainline: v5.11-rc1
Git-commit: 9d76d46751594264a91387583fef49af334ccea6
References: bsc#1164780

When a target swap happens, under certain conditions the node sends a
LOGO. The unsolicited ELS logic responds with a reject. The logic may
allocate a new node to handle this. Afterward, the new nodes are dropped
incorrectly leaving them in a mis-matched state and refcounting causes a
use-after-free situation leading to a crash.

It is also possible that the unsolicited els handling finds a node which is
in an UNUSED state. The handling moves these nodes to NPR state with a
refcount of 1. Although the end of the discovery logic assumes a final put
will free such a node, there are codes paths which could increment the
reference count, thus the node is in NPR state and not released.
Eventually this mismatch in state and refcount leads to premature release
of the node causing a crash.

Fix by always using the discovery engine DEVICE RM event to decrement and
release the nodes (rather than explicit code that tried to do it before).
This will take care of moving the node to the UNUSED state and then removes
the final ref count. If there is a trigger to reuse this node, the
transition from the UNUSED state clearly indicates that the initial
reference is then incremented and use can continue.

Link: https://lore.kernel.org/r/20201115192646.12977-8-james.smart@broadcom.com
Co-developed-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Acked-by: Daniel Wagner <dwagner@suse.de>
---
 drivers/scsi/lpfc/lpfc_els.c |   33 ++++++++++++++++++++++-----------
 1 file changed, 22 insertions(+), 11 deletions(-)

--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -8728,7 +8728,8 @@ lpfc_els_unsol_buffer(struct lpfc_hba *p
 
 		lpfc_els_rcv_flogi(vport, elsiocb, ndlp);
 		if (newnode)
-			lpfc_nlp_put(ndlp);
+			lpfc_disc_state_machine(vport, ndlp, NULL,
+					NLP_EVT_DEVICE_RM);
 		break;
 	case ELS_CMD_LOGO:
 		lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL,
@@ -8770,7 +8771,8 @@ lpfc_els_unsol_buffer(struct lpfc_hba *p
 		phba->fc_stat.elsRcvRSCN++;
 		lpfc_els_rcv_rscn(vport, elsiocb, ndlp);
 		if (newnode)
-			lpfc_nlp_put(ndlp);
+			lpfc_disc_state_machine(vport, ndlp, NULL,
+					NLP_EVT_DEVICE_RM);
 		break;
 	case ELS_CMD_ADISC:
 		lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL,
@@ -8848,7 +8850,8 @@ lpfc_els_unsol_buffer(struct lpfc_hba *p
 		phba->fc_stat.elsRcvLIRR++;
 		lpfc_els_rcv_lirr(vport, elsiocb, ndlp);
 		if (newnode)
-			lpfc_nlp_put(ndlp);
+			lpfc_disc_state_machine(vport, ndlp, NULL,
+					NLP_EVT_DEVICE_RM);
 		break;
 	case ELS_CMD_RLS:
 		lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL,
@@ -8858,7 +8861,8 @@ lpfc_els_unsol_buffer(struct lpfc_hba *p
 		phba->fc_stat.elsRcvRLS++;
 		lpfc_els_rcv_rls(vport, elsiocb, ndlp);
 		if (newnode)
-			lpfc_nlp_put(ndlp);
+			lpfc_disc_state_machine(vport, ndlp, NULL,
+					NLP_EVT_DEVICE_RM);
 		break;
 	case ELS_CMD_RPL:
 		lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL,
@@ -8868,7 +8872,8 @@ lpfc_els_unsol_buffer(struct lpfc_hba *p
 		phba->fc_stat.elsRcvRPL++;
 		lpfc_els_rcv_rpl(vport, elsiocb, ndlp);
 		if (newnode)
-			lpfc_nlp_put(ndlp);
+			lpfc_disc_state_machine(vport, ndlp, NULL,
+					NLP_EVT_DEVICE_RM);
 		break;
 	case ELS_CMD_RNID:
 		lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL,
@@ -8878,7 +8883,8 @@ lpfc_els_unsol_buffer(struct lpfc_hba *p
 		phba->fc_stat.elsRcvRNID++;
 		lpfc_els_rcv_rnid(vport, elsiocb, ndlp);
 		if (newnode)
-			lpfc_nlp_put(ndlp);
+			lpfc_disc_state_machine(vport, ndlp, NULL,
+					NLP_EVT_DEVICE_RM);
 		break;
 	case ELS_CMD_RTV:
 		lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL,
@@ -8887,7 +8893,8 @@ lpfc_els_unsol_buffer(struct lpfc_hba *p
 		phba->fc_stat.elsRcvRTV++;
 		lpfc_els_rcv_rtv(vport, elsiocb, ndlp);
 		if (newnode)
-			lpfc_nlp_put(ndlp);
+			lpfc_disc_state_machine(vport, ndlp, NULL,
+					NLP_EVT_DEVICE_RM);
 		break;
 	case ELS_CMD_RRQ:
 		lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL,
@@ -8897,7 +8904,8 @@ lpfc_els_unsol_buffer(struct lpfc_hba *p
 		phba->fc_stat.elsRcvRRQ++;
 		lpfc_els_rcv_rrq(vport, elsiocb, ndlp);
 		if (newnode)
-			lpfc_nlp_put(ndlp);
+			lpfc_disc_state_machine(vport, ndlp, NULL,
+					NLP_EVT_DEVICE_RM);
 		break;
 	case ELS_CMD_ECHO:
 		lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL,
@@ -8907,7 +8915,8 @@ lpfc_els_unsol_buffer(struct lpfc_hba *p
 		phba->fc_stat.elsRcvECHO++;
 		lpfc_els_rcv_echo(vport, elsiocb, ndlp);
 		if (newnode)
-			lpfc_nlp_put(ndlp);
+			lpfc_disc_state_machine(vport, ndlp, NULL,
+					NLP_EVT_DEVICE_RM);
 		break;
 	case ELS_CMD_REC:
 		/* receive this due to exchange closed */
@@ -8938,7 +8947,8 @@ lpfc_els_unsol_buffer(struct lpfc_hba *p
 				 "0115 Unknown ELS command x%x "
 				 "received from NPORT x%x\n", cmd, did);
 		if (newnode)
-			lpfc_nlp_put(ndlp);
+			lpfc_disc_state_machine(vport, ndlp, NULL,
+					NLP_EVT_DEVICE_RM);
 		break;
 	}
 
@@ -8952,7 +8962,8 @@ lpfc_els_unsol_buffer(struct lpfc_hba *p
 				    NULL);
 		/* Remove the reference from above for new nodes. */
 		if (newnode)
-			lpfc_nlp_put(ndlp);
+			lpfc_disc_state_machine(vport, ndlp, NULL,
+					NLP_EVT_DEVICE_RM);
 	}
 
 	/* Release the reference on this elsiocb, not the ndlp. */