Blob Blame History Raw
From: James Smart <jsmart2021@gmail.com>
Date: Mon, 28 Jan 2019 11:14:25 -0800
Subject: [PATCH] scsi: lpfc: Move SCSI and NVME Stats to hardware queue
 structures
Git-commit: 4c47efc140fa926f00aa59c248458d95bd7b5eab
Patch-mainline: v5.1-rc1
References: bsc#1123236

Many io statistics were being sampled and saved using adapter-based data
structures. This was creating a lot of contention and cache thrashing in
the I/O path.

Move the statistics to the hardware queue data structures.  Given the
per-queue data structures, use of atomic types is lessened.

Add new sysfs and debugfs stat routines to collate the per hardware queue
values and report at an adapter level.

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <jsmart2021@gmail.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Acked-by: Hannes Reinecke <hare@suse.com>
---
 drivers/scsi/lpfc/lpfc.h         |   9 +--
 drivers/scsi/lpfc/lpfc_attr.c    |  68 ++++++++++++++---
 drivers/scsi/lpfc/lpfc_debugfs.c | 158 +++++++++++++++++++++++++++++++++++++--
 drivers/scsi/lpfc/lpfc_debugfs.h |   3 +
 drivers/scsi/lpfc/lpfc_init.c    |  40 ++++++----
 drivers/scsi/lpfc/lpfc_nvme.c    |  57 +++++---------
 drivers/scsi/lpfc/lpfc_nvme.h    |  11 +--
 drivers/scsi/lpfc/lpfc_scsi.c    |  47 ++++++++----
 drivers/scsi/lpfc/lpfc_scsi.h    |   3 +
 drivers/scsi/lpfc/lpfc_sli4.h    |  11 +++
 10 files changed, 304 insertions(+), 103 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index 310437b6b51a..9262c52e32d6 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -479,6 +479,7 @@ struct lpfc_vport {
 	struct dentry *debug_disc_trc;
 	struct dentry *debug_nodelist;
 	struct dentry *debug_nvmestat;
+	struct dentry *debug_scsistat;
 	struct dentry *debug_nvmektime;
 	struct dentry *debug_cpucheck;
 	struct dentry *vport_debugfs_root;
@@ -946,14 +947,6 @@ struct lpfc_hba {
 	struct timer_list eratt_poll;
 	uint32_t eratt_poll_interval;
 
-	/*
-	 * stat  counters
-	 */
-	atomic_t fc4ScsiInputRequests;
-	atomic_t fc4ScsiOutputRequests;
-	atomic_t fc4ScsiControlRequests;
-	atomic_t fc4ScsiIoCmpls;
-
 	uint64_t bg_guard_err_cnt;
 	uint64_t bg_apptag_err_cnt;
 	uint64_t bg_reftag_err_cnt;
diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index 1671d9371d3b..e10d930fcb6a 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -64,9 +64,6 @@
 #define LPFC_MIN_MRQ_POST	512
 #define LPFC_MAX_MRQ_POST	2048
 
-#define LPFC_MAX_NVME_INFO_TMP_LEN	100
-#define LPFC_NVME_INFO_MORE_STR		"\nCould be more info...\n"
-
 /*
  * Write key size should be multiple of 4. If write key is changed
  * make sure that library write key is also changed.
@@ -155,7 +152,7 @@ lpfc_nvme_info_show(struct device *dev, struct device_attribute *attr,
 	struct lpfc_nvme_rport *rport;
 	struct lpfc_nodelist *ndlp;
 	struct nvme_fc_remote_port *nrport;
-	struct lpfc_nvme_ctrl_stat *cstat;
+	struct lpfc_fc4_ctrl_stat *cstat;
 	uint64_t data1, data2, data3;
 	uint64_t totin, totout, tot;
 	char *statep;
@@ -457,12 +454,12 @@ lpfc_nvme_info_show(struct device *dev, struct device_attribute *attr,
 	totin = 0;
 	totout = 0;
 	for (i = 0; i < phba->cfg_hdw_queue; i++) {
-		cstat = &lport->cstat[i];
-		tot = atomic_read(&cstat->fc4NvmeIoCmpls);
+		cstat = &phba->sli4_hba.hdwq[i].nvme_cstat;
+		tot = cstat->io_cmpls;
 		totin += tot;
-		data1 = atomic_read(&cstat->fc4NvmeInputRequests);
-		data2 = atomic_read(&cstat->fc4NvmeOutputRequests);
-		data3 = atomic_read(&cstat->fc4NvmeControlRequests);
+		data1 = cstat->input_requests;
+		data2 = cstat->output_requests;
+		data3 = cstat->control_requests;
 		totout += (data1 + data2 + data3);
 	}
 	scnprintf(tmp, sizeof(tmp),
@@ -508,6 +505,57 @@ lpfc_nvme_info_show(struct device *dev, struct device_attribute *attr,
 	return len;
 }
 
+static ssize_t
+lpfc_scsi_stat_show(struct device *dev, struct device_attribute *attr,
+		    char *buf)
+{
+	struct Scsi_Host *shost = class_to_shost(dev);
+	struct lpfc_vport *vport = shost_priv(shost);
+	struct lpfc_hba *phba = vport->phba;
+	int len;
+	struct lpfc_fc4_ctrl_stat *cstat;
+	u64 data1, data2, data3;
+	u64 tot, totin, totout;
+	int i;
+	char tmp[LPFC_MAX_SCSI_INFO_TMP_LEN] = {0};
+
+	if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP) ||
+	    (phba->sli_rev != LPFC_SLI_REV4))
+		return 0;
+
+	scnprintf(buf, PAGE_SIZE, "SCSI HDWQ Statistics\n");
+
+	totin = 0;
+	totout = 0;
+	for (i = 0; i < phba->cfg_hdw_queue; i++) {
+		cstat = &phba->sli4_hba.hdwq[i].scsi_cstat;
+		tot = cstat->io_cmpls;
+		totin += tot;
+		data1 = cstat->input_requests;
+		data2 = cstat->output_requests;
+		data3 = cstat->control_requests;
+		totout += (data1 + data2 + data3);
+
+		scnprintf(tmp, sizeof(tmp), "HDWQ (%d): Rd %016llx Wr %016llx "
+			  "IO %016llx ", i, data1, data2, data3);
+		if (strlcat(buf, tmp, PAGE_SIZE) >= PAGE_SIZE)
+			goto buffer_done;
+
+		scnprintf(tmp, sizeof(tmp), "Cmpl %016llx OutIO %016llx\n",
+			  tot, ((data1 + data2 + data3) - tot));
+		if (strlcat(buf, tmp, PAGE_SIZE) >= PAGE_SIZE)
+			goto buffer_done;
+	}
+	scnprintf(tmp, sizeof(tmp), "Total FCP Cmpl %016llx Issue %016llx "
+		  "OutIO %016llx\n", totin, totout, totout - totin);
+	strlcat(buf, tmp, PAGE_SIZE);
+
+buffer_done:
+	len = strnlen(buf, PAGE_SIZE);
+
+	return len;
+}
+
 static ssize_t
 lpfc_bg_info_show(struct device *dev, struct device_attribute *attr,
 		  char *buf)
@@ -2573,6 +2621,7 @@ lpfc_##attr##_store(struct device *dev, struct device_attribute *attr, \
 
 
 static DEVICE_ATTR(nvme_info, 0444, lpfc_nvme_info_show, NULL);
+static DEVICE_ATTR(scsi_stat, 0444, lpfc_scsi_stat_show, NULL);
 static DEVICE_ATTR(bg_info, S_IRUGO, lpfc_bg_info_show, NULL);
 static DEVICE_ATTR(bg_guard_err, S_IRUGO, lpfc_bg_guard_err_show, NULL);
 static DEVICE_ATTR(bg_apptag_err, S_IRUGO, lpfc_bg_apptag_err_show, NULL);
@@ -5642,6 +5691,7 @@ LPFC_ATTR_RW(enable_dpp, 1, 0, 1, "Enable Direct Packet Push");
 
 struct device_attribute *lpfc_hba_attrs[] = {
 	&dev_attr_nvme_info,
+	&dev_attr_scsi_stat,
 	&dev_attr_bg_info,
 	&dev_attr_bg_guard_err,
 	&dev_attr_bg_apptag_err,
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
index d84fb8c088b1..218642d7c488 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c
@@ -840,7 +840,7 @@ lpfc_debugfs_nvmestat_data(struct lpfc_vport *vport, char *buf, int size)
 	struct lpfc_nvmet_tgtport *tgtp;
 	struct lpfc_nvmet_rcv_ctx *ctxp, *next_ctxp;
 	struct nvme_fc_local_port *localport;
-	struct lpfc_nvme_ctrl_stat *cstat;
+	struct lpfc_fc4_ctrl_stat *cstat;
 	struct lpfc_nvme_lport *lport;
 	uint64_t data1, data2, data3;
 	uint64_t tot, totin, totout;
@@ -979,7 +979,7 @@ lpfc_debugfs_nvmestat_data(struct lpfc_vport *vport, char *buf, int size)
 			return len;
 
 		len += snprintf(buf + len, size - len,
-				"\nNVME Lport Statistics\n");
+				"\nNVME HDWQ Statistics\n");
 
 		len += snprintf(buf + len, size - len,
 				"LS: Xmt %016x Cmpl %016x\n",
@@ -993,12 +993,12 @@ lpfc_debugfs_nvmestat_data(struct lpfc_vport *vport, char *buf, int size)
 		totin = 0;
 		totout = 0;
 		for (i = 0; i < phba->cfg_hdw_queue; i++) {
-			cstat = &lport->cstat[i];
-			tot = atomic_read(&cstat->fc4NvmeIoCmpls);
+			cstat = &phba->sli4_hba.hdwq[i].nvme_cstat;
+			tot = cstat->io_cmpls;
 			totin += tot;
-			data1 = atomic_read(&cstat->fc4NvmeInputRequests);
-			data2 = atomic_read(&cstat->fc4NvmeOutputRequests);
-			data3 = atomic_read(&cstat->fc4NvmeControlRequests);
+			data1 = cstat->input_requests;
+			data2 = cstat->output_requests;
+			data3 = cstat->control_requests;
 			totout += (data1 + data2 + data3);
 
 			/* Limit to 32, debugfs display buffer limitation */
@@ -1006,7 +1006,7 @@ lpfc_debugfs_nvmestat_data(struct lpfc_vport *vport, char *buf, int size)
 				continue;
 
 			len += snprintf(buf + len, PAGE_SIZE - len,
-					"FCP (%d): Rd %016llx Wr %016llx "
+					"HDWQ (%d): Rd %016llx Wr %016llx "
 					"IO %016llx ",
 					i, data1, data2, data3);
 			len += snprintf(buf + len, PAGE_SIZE - len,
@@ -1046,6 +1046,66 @@ lpfc_debugfs_nvmestat_data(struct lpfc_vport *vport, char *buf, int size)
 	return len;
 }
 
+/**
+ * lpfc_debugfs_scsistat_data - Dump target node list to a buffer
+ * @vport: The vport to gather target node info from.
+ * @buf: The buffer to dump log into.
+ * @size: The maximum amount of data to process.
+ *
+ * Description:
+ * This routine dumps the SCSI statistics associated with @vport
+ *
+ * Return Value:
+ * This routine returns the amount of bytes that were dumped into @buf and will
+ * not exceed @size.
+ **/
+static int
+lpfc_debugfs_scsistat_data(struct lpfc_vport *vport, char *buf, int size)
+{
+	int len;
+	struct lpfc_hba *phba = vport->phba;
+	struct lpfc_fc4_ctrl_stat *cstat;
+	u64 data1, data2, data3;
+	u64 tot, totin, totout;
+	int i;
+	char tmp[LPFC_MAX_SCSI_INFO_TMP_LEN] = {0};
+
+	if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP) ||
+	    (phba->sli_rev != LPFC_SLI_REV4))
+		return 0;
+
+	scnprintf(buf, size, "SCSI HDWQ Statistics\n");
+
+	totin = 0;
+	totout = 0;
+	for (i = 0; i < phba->cfg_hdw_queue; i++) {
+		cstat = &phba->sli4_hba.hdwq[i].scsi_cstat;
+		tot = cstat->io_cmpls;
+		totin += tot;
+		data1 = cstat->input_requests;
+		data2 = cstat->output_requests;
+		data3 = cstat->control_requests;
+		totout += (data1 + data2 + data3);
+
+		scnprintf(tmp, sizeof(tmp), "HDWQ (%d): Rd %016llx Wr %016llx "
+			  "IO %016llx ", i, data1, data2, data3);
+		if (strlcat(buf, tmp, size) >= size)
+			goto buffer_done;
+
+		scnprintf(tmp, sizeof(tmp), "Cmpl %016llx OutIO %016llx\n",
+			  tot, ((data1 + data2 + data3) - tot));
+		if (strlcat(buf, tmp, size) >= size)
+			goto buffer_done;
+	}
+	scnprintf(tmp, sizeof(tmp), "Total FCP Cmpl %016llx Issue %016llx "
+		  "OutIO %016llx\n", totin, totout, totout - totin);
+	strlcat(buf, tmp, size);
+
+buffer_done:
+	len = strnlen(buf, size);
+
+	return len;
+}
 
 /**
  * lpfc_debugfs_nvmektime_data - Dump target node list to a buffer
@@ -2211,6 +2271,64 @@ lpfc_debugfs_nvmestat_write(struct file *file, const char __user *buf,
 	return nbytes;
 }
 
+static int
+lpfc_debugfs_scsistat_open(struct inode *inode, struct file *file)
+{
+	struct lpfc_vport *vport = inode->i_private;
+	struct lpfc_debug *debug;
+	int rc = -ENOMEM;
+
+	debug = kmalloc(sizeof(*debug), GFP_KERNEL);
+	if (!debug)
+		goto out;
+
+	 /* Round to page boundary */
+	debug->buffer = kzalloc(LPFC_SCSISTAT_SIZE, GFP_KERNEL);
+	if (!debug->buffer) {
+		kfree(debug);
+		goto out;
+	}
+
+	debug->len = lpfc_debugfs_scsistat_data(vport, debug->buffer,
+		LPFC_SCSISTAT_SIZE);
+
+	debug->i_private = inode->i_private;
+	file->private_data = debug;
+
+	rc = 0;
+out:
+	return rc;
+}
+
+static ssize_t
+lpfc_debugfs_scsistat_write(struct file *file, const char __user *buf,
+			    size_t nbytes, loff_t *ppos)
+{
+	struct lpfc_debug *debug = file->private_data;
+	struct lpfc_vport *vport = (struct lpfc_vport *)debug->i_private;
+	struct lpfc_hba *phba = vport->phba;
+	char mybuf[6] = {0};
+	int i;
+
+	/* Protect copy from user */
+	if (!access_ok(VERIFY_READ, buf, nbytes))
+		return -EFAULT;
+
+	if (copy_from_user(mybuf, buf, (nbytes >= sizeof(mybuf)) ?
+				       (sizeof(mybuf) - 1) : nbytes))
+		return -EFAULT;
+
+	if ((strncmp(&mybuf[0], "reset", strlen("reset")) == 0) ||
+	    (strncmp(&mybuf[0], "zero", strlen("zero")) == 0)) {
+		for (i = 0; i < phba->cfg_hdw_queue; i++) {
+			memset(&phba->sli4_hba.hdwq[i].scsi_cstat, 0,
+			       sizeof(phba->sli4_hba.hdwq[i].scsi_cstat));
+		}
+	}
+
+	return nbytes;
+}
+
 static int
 lpfc_debugfs_nvmektime_open(struct inode *inode, struct file *file)
 {
@@ -4972,6 +5090,16 @@ static const struct file_operations lpfc_debugfs_op_nvmestat = {
 	.release =      lpfc_debugfs_release,
 };
 
+#undef lpfc_debugfs_op_scsistat
+static const struct file_operations lpfc_debugfs_op_scsistat = {
+	.owner =        THIS_MODULE,
+	.open =         lpfc_debugfs_scsistat_open,
+	.llseek =       lpfc_debugfs_lseek,
+	.read =         lpfc_debugfs_read,
+	.write =	lpfc_debugfs_scsistat_write,
+	.release =      lpfc_debugfs_release,
+};
+
 #undef lpfc_debugfs_op_nvmektime
 static const struct file_operations lpfc_debugfs_op_nvmektime = {
 	.owner =        THIS_MODULE,
@@ -5612,6 +5740,17 @@ lpfc_debugfs_initialize(struct lpfc_vport *vport)
 				    vport->vport_debugfs_root,
 				    vport, &lpfc_debugfs_op_nvmestat);
 
+	snprintf(name, sizeof(name), "scsistat");
+	vport->debug_scsistat =
+		debugfs_create_file(name, 0644,
+				    vport->vport_debugfs_root,
+				    vport, &lpfc_debugfs_op_scsistat);
+	if (!vport->debug_scsistat) {
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT,
+				 "0811 Cannot create debugfs scsistat\n");
+		goto debug_failed;
+	}
+
 	snprintf(name, sizeof(name), "nvmektime");
 	vport->debug_nvmektime =
 		debugfs_create_file(name, 0644,
@@ -5750,6 +5889,9 @@ lpfc_debugfs_terminate(struct lpfc_vport *vport)
 	debugfs_remove(vport->debug_nvmestat); /* nvmestat */
 	vport->debug_nvmestat = NULL;
 
+	debugfs_remove(vport->debug_scsistat); /* scsistat */
+	vport->debug_scsistat = NULL;
+
 	debugfs_remove(vport->debug_nvmektime); /* nvmektime */
 	vport->debug_nvmektime = NULL;
 
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.h b/drivers/scsi/lpfc/lpfc_debugfs.h
index c904fa74dfbc..2360ec8fb848 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.h
+++ b/drivers/scsi/lpfc/lpfc_debugfs.h
@@ -50,6 +50,9 @@
 #define LPFC_CPUCHECK_SIZE 8192
 #define LPFC_NVMEIO_TRC_SIZE 8192
 
+/* scsistat output buffer size */
+#define LPFC_SCSISTAT_SIZE 8192
+
 #define LPFC_DEBUG_OUT_LINE_SZ	80
 
 /*
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index ed0318b9003e..a15c3aa569b5 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -1282,7 +1282,7 @@ lpfc_hb_timeout_handler(struct lpfc_hba *phba)
 	struct lpfc_register reg_data;
 	struct nvme_fc_local_port *localport;
 	struct lpfc_nvme_lport *lport;
-	struct lpfc_nvme_ctrl_stat *cstat;
+	struct lpfc_fc4_ctrl_stat *cstat;
 	void __iomem *eqdreg = phba->sli4_hba.u.if_type2.EQDregaddr;
 
 	vports = lpfc_create_vport_work_array(phba);
@@ -1324,16 +1324,13 @@ lpfc_hb_timeout_handler(struct lpfc_hba *phba)
 				tot = 0;
 				for (i = 0;
 					i < phba->cfg_hdw_queue; i++) {
-					cstat = &lport->cstat[i];
-					data1 = atomic_read(
-						&cstat->fc4NvmeInputRequests);
-					data2 = atomic_read(
-						&cstat->fc4NvmeOutputRequests);
-					data3 = atomic_read(
-						&cstat->fc4NvmeControlRequests);
+					cstat =
+					     &phba->sli4_hba.hdwq[i].nvme_cstat;
+					data1 = cstat->input_requests;
+					data2 = cstat->output_requests;
+					data3 = cstat->control_requests;
 					tot += (data1 + data2 + data3);
-					tot -= atomic_read(
-						&cstat->fc4NvmeIoCmpls);
+					tot -= cstat->io_cmpls;
 				}
 			}
 		}
@@ -7221,10 +7218,6 @@ lpfc_create_shost(struct lpfc_hba *phba)
 	phba->fc_arbtov = FF_DEF_ARBTOV;
 
 	atomic_set(&phba->sdev_cnt, 0);
-	atomic_set(&phba->fc4ScsiInputRequests, 0);
-	atomic_set(&phba->fc4ScsiOutputRequests, 0);
-	atomic_set(&phba->fc4ScsiControlRequests, 0);
-	atomic_set(&phba->fc4ScsiIoCmpls, 0);
 	vport = lpfc_create_port(phba, phba->brd_no, &phba->pcidev->dev);
 	if (!vport)
 		return -ENODEV;
@@ -8776,6 +8769,25 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 			phba->sli4_hba.nvmet_mrq_data[idx] = qdesc;
 		}
 	}
+
+#if defined(BUILD_NVME)
+	/* Clear NVME stats */
+	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
+		for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
+			memset(&phba->sli4_hba.hdwq[idx].nvme_cstat, 0,
+			       sizeof(phba->sli4_hba.hdwq[idx].nvme_cstat));
+		}
+	}
+#endif
+
+	/* Clear SCSI stats */
+	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP) {
+		for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
+			memset(&phba->sli4_hba.hdwq[idx].scsi_cstat, 0,
+			       sizeof(phba->sli4_hba.hdwq[idx].scsi_cstat));
+		}
+	}
+
 	return 0;
 
 out_error:
diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c
index fe0190b48abd..c13638a3c0e7 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.c
+++ b/drivers/scsi/lpfc/lpfc_nvme.c
@@ -964,7 +964,6 @@ lpfc_nvme_io_cmd_wqe_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn,
 	struct lpfc_nodelist *ndlp;
 	struct lpfc_nvme_fcpreq_priv *freqpriv;
 	struct lpfc_nvme_lport *lport;
-	struct lpfc_nvme_ctrl_stat *cstat;
 	uint32_t code, status, idx, cpu;
 	uint16_t cid, sqhd, data;
 	uint32_t *ptr;
@@ -991,17 +990,15 @@ lpfc_nvme_io_cmd_wqe_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn,
 	nCmd = lpfc_ncmd->nvmeCmd;
 	status = bf_get(lpfc_wcqe_c_status, wcqe);
 
+	idx = lpfc_ncmd->cur_iocbq.hba_wqidx;
+	phba->sli4_hba.hdwq[idx].nvme_cstat.io_cmpls++;
+
 	if (vport->localport) {
 		lport = (struct lpfc_nvme_lport *)vport->localport->private;
-		if (lport) {
-			idx = lpfc_ncmd->cur_iocbq.hba_wqidx;
-			cstat = &lport->cstat[idx];
-			atomic_inc(&cstat->fc4NvmeIoCmpls);
-			if (status) {
-				if (bf_get(lpfc_wcqe_c_xb, wcqe))
-					atomic_inc(&lport->cmpl_fcp_xb);
-				atomic_inc(&lport->cmpl_fcp_err);
-			}
+		if (lport && status) {
+			if (bf_get(lpfc_wcqe_c_xb, wcqe))
+				atomic_inc(&lport->cmpl_fcp_xb);
+			atomic_inc(&lport->cmpl_fcp_err);
 		}
 	}
 
@@ -1186,7 +1183,7 @@ static int
 lpfc_nvme_prep_io_cmd(struct lpfc_vport *vport,
 		      struct lpfc_nvme_buf *lpfc_ncmd,
 		      struct lpfc_nodelist *pnode,
-		      struct lpfc_nvme_ctrl_stat *cstat)
+		      struct lpfc_fc4_ctrl_stat *cstat)
 {
 	struct lpfc_hba *phba = vport->phba;
 	struct nvmefc_fcp_req *nCmd = lpfc_ncmd->nvmeCmd;
@@ -1224,7 +1221,7 @@ lpfc_nvme_prep_io_cmd(struct lpfc_vport *vport,
 			} else {
 				wqe->fcp_iwrite.initial_xfer_len = 0;
 			}
-			atomic_inc(&cstat->fc4NvmeOutputRequests);
+			cstat->output_requests++;
 		} else {
 			/* From the iread template, initialize words 7 - 11 */
 			memcpy(&wqe->words[7],
@@ -1237,13 +1234,13 @@ lpfc_nvme_prep_io_cmd(struct lpfc_vport *vport,
 			/* Word 5 */
 			wqe->fcp_iread.rsrvd5 = 0;
 
-			atomic_inc(&cstat->fc4NvmeInputRequests);
+			cstat->input_requests++;
 		}
 	} else {
 		/* From the icmnd template, initialize words 4 - 11 */
 		memcpy(&wqe->words[4], &lpfc_icmnd_cmd_template.words[4],
 		       sizeof(uint32_t) * 8);
-		atomic_inc(&cstat->fc4NvmeControlRequests);
+		cstat->control_requests++;
 	}
 	/*
 	 * Finish initializing those WQE fields that are independent
@@ -1427,7 +1424,7 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport,
 	int expedite = 0;
 	int idx, cpu;
 	struct lpfc_nvme_lport *lport;
-	struct lpfc_nvme_ctrl_stat *cstat;
+	struct lpfc_fc4_ctrl_stat *cstat;
 	struct lpfc_vport *vport;
 	struct lpfc_hba *phba;
 	struct lpfc_nodelist *ndlp;
@@ -1590,7 +1587,7 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport,
 	 */
 	idx = lpfc_queue_info->index;
 	lpfc_ncmd->cur_iocbq.hba_wqidx = idx;
-	cstat = &lport->cstat[idx];
+	cstat = &phba->sli4_hba.hdwq[idx].nvme_cstat;
 
 	lpfc_nvme_prep_io_cmd(vport, lpfc_ncmd, ndlp, cstat);
 	ret = lpfc_nvme_prep_io_dma(vport, lpfc_ncmd);
@@ -1643,11 +1640,11 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport,
  out_free_nvme_buf:
 	if (lpfc_ncmd->nvmeCmd->sg_cnt) {
 		if (lpfc_ncmd->nvmeCmd->io_dir == NVMEFC_FCP_WRITE)
-			atomic_dec(&cstat->fc4NvmeOutputRequests);
+			cstat->output_requests--;
 		else
-			atomic_dec(&cstat->fc4NvmeInputRequests);
+			cstat->input_requests--;
 	} else
-		atomic_dec(&cstat->fc4NvmeControlRequests);
+		cstat->control_requests--;
 	lpfc_release_nvme_buf(phba, lpfc_ncmd);
  out_fail:
 	return ret;
@@ -2079,8 +2076,6 @@ lpfc_nvme_create_localport(struct lpfc_vport *vport)
 	struct nvme_fc_port_info nfcp_info;
 	struct nvme_fc_local_port *localport;
 	struct lpfc_nvme_lport *lport;
-	struct lpfc_nvme_ctrl_stat *cstat;
-	int i;
 
 	/* Initialize this localport instance.  The vport wwn usage ensures
 	 * that NPIV is accounted for.
@@ -2097,11 +2092,6 @@ lpfc_nvme_create_localport(struct lpfc_vport *vport)
 	lpfc_nvme_template.max_sgl_segments = phba->cfg_nvme_seg_cnt + 1;
 	lpfc_nvme_template.max_hw_queues = phba->cfg_hdw_queue;
 
-	cstat = kmalloc((sizeof(struct lpfc_nvme_ctrl_stat) *
-			phba->cfg_hdw_queue), GFP_KERNEL);
-	if (!cstat)
-		return -ENOMEM;
-
 	/* localport is allocated from the stack, but the registration
 	 * call allocates heap memory as well as the private area.
 	 */
@@ -2124,7 +2114,6 @@ lpfc_nvme_create_localport(struct lpfc_vport *vport)
 		lport = (struct lpfc_nvme_lport *)localport->private;
 		vport->localport = localport;
 		lport->vport = vport;
-		lport->cstat = cstat;
 		vport->nvmei_support = 1;
 
 		atomic_set(&lport->xmt_fcp_noxri, 0);
@@ -2141,16 +2130,7 @@ lpfc_nvme_create_localport(struct lpfc_vport *vport)
 		atomic_set(&lport->cmpl_ls_err, 0);
 		atomic_set(&lport->fc4NvmeLsRequests, 0);
 		atomic_set(&lport->fc4NvmeLsCmpls, 0);
-
-		for (i = 0; i < phba->cfg_hdw_queue; i++) {
-			cstat = &lport->cstat[i];
-			atomic_set(&cstat->fc4NvmeInputRequests, 0);
-			atomic_set(&cstat->fc4NvmeOutputRequests, 0);
-			atomic_set(&cstat->fc4NvmeControlRequests, 0);
-			atomic_set(&cstat->fc4NvmeIoCmpls, 0);
-		}
-	} else
-		kfree(cstat);
+	}
 
 	return ret;
 }
@@ -2212,8 +2192,7 @@ lpfc_nvme_destroy_localport(struct lpfc_vport *vport)
 #if (IS_ENABLED(CONFIG_NVME_FC))
 	struct nvme_fc_local_port *localport;
 	struct lpfc_nvme_lport *lport;
-	struct lpfc_nvme_ctrl_stat *cstat;
 	int ret;
 	DECLARE_COMPLETION_ONSTACK(lport_unreg_cmp);
 
 	if (vport->nvmei_support == 0)
@@ -2221,7 +2200,6 @@ lpfc_nvme_destroy_localport(struct lpfc_vport *vport)
 
 	localport = vport->localport;
 	lport = (struct lpfc_nvme_lport *)localport->private;
-	cstat = lport->cstat;
 
 	lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME,
 			 "6011 Destroying NVME localport %p\n",
@@ -2237,7 +2215,6 @@ lpfc_nvme_destroy_localport(struct lpfc_vport *vport)
 	 */
 	lpfc_nvme_lport_unreg_wait(vport, lport, &lport_unreg_cmp);
 	vport->localport = NULL;
-	kfree(cstat);
 
 	/* Regardless of the unregister upcall response, clear
 	 * nvmei_support.  All rports are unregistered and the
diff --git a/drivers/scsi/lpfc/lpfc_nvme.h b/drivers/scsi/lpfc/lpfc_nvme.h
index 4a020b9c8fbf..974fbec7dd04 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.h
+++ b/drivers/scsi/lpfc/lpfc_nvme.h
@@ -30,6 +30,9 @@
 #define LPFC_NVME_FB_SHIFT		9
 #define LPFC_NVME_MAX_FB		(1 << 20)	/* 1M */
 
+#define LPFC_MAX_NVME_INFO_TMP_LEN	100
+#define LPFC_NVME_INFO_MORE_STR		"\nCould be more info...\n"
+
 #define lpfc_ndlp_get_nrport(ndlp)					\
 	((!ndlp->nrport || (ndlp->upcall_flags & NLP_WAIT_FOR_UNREG))	\
 	? NULL : ndlp->nrport)
@@ -40,19 +43,11 @@ struct lpfc_nvme_qhandle {
 	uint32_t cpu_id;	/* current cpu id at time of create */
 };
 
-struct lpfc_nvme_ctrl_stat {
-	atomic_t fc4NvmeInputRequests;
-	atomic_t fc4NvmeOutputRequests;
-	atomic_t fc4NvmeControlRequests;
-	atomic_t fc4NvmeIoCmpls;
-};
-
 /* Declare nvme-based local and remote port definitions. */
 struct lpfc_nvme_lport {
 	struct lpfc_vport *vport;
 	struct completion *lport_unreg_cmp;
 	/* Add stats counters here */
-	struct lpfc_nvme_ctrl_stat *cstat;
 	atomic_t fc4NvmeLsRequests;
 	atomic_t fc4NvmeLsCmpls;
 	atomic_t xmt_fcp_noxri;
diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index 261d0ebf0093..08f284ca5f1e 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -3722,14 +3722,18 @@ lpfc_scsi_cmd_iocb_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pIocbIn,
 	unsigned long flags;
 	struct lpfc_fast_path_event *fast_path_evt;
 	struct Scsi_Host *shost;
+	int idx;
 	uint32_t logit = LOG_FCP;
 
-	atomic_inc(&phba->fc4ScsiIoCmpls);
-
 	/* Sanity check on return of outstanding command */
 	cmd = lpfc_cmd->pCmd;
 	if (!cmd)
 		return;
+
+	idx = lpfc_cmd->cur_iocbq.hba_wqidx;
+	if (phba->sli4_hba.hdwq)
+		phba->sli4_hba.hdwq[idx].scsi_cstat.io_cmpls++;
+
 	shost = cmd->device->host;
 
 	lpfc_cmd->result = (pIocbOut->iocb.un.ulpWord[4] & IOERR_PARAM_MASK);
@@ -3986,7 +3990,9 @@ lpfc_scsi_prep_cmnd(struct lpfc_vport *vport, struct lpfc_scsi_buf *lpfc_cmd,
 	struct fcp_cmnd *fcp_cmnd = lpfc_cmd->fcp_cmnd;
 	IOCB_t *iocb_cmd = &lpfc_cmd->cur_iocbq.iocb;
 	struct lpfc_iocbq *piocbq = &(lpfc_cmd->cur_iocbq);
+	struct lpfc_sli4_hdw_queue *hdwq = NULL;
 	int datadir = scsi_cmnd->sc_data_direction;
+	int idx;
 	uint8_t *ptr;
 	bool sli4;
 	uint32_t fcpdl;
@@ -4012,6 +4018,9 @@ lpfc_scsi_prep_cmnd(struct lpfc_vport *vport, struct lpfc_scsi_buf *lpfc_cmd,
 
 	sli4 = (phba->sli_rev == LPFC_SLI_REV4);
 	piocbq->iocb.un.fcpi.fcpi_XRdy = 0;
+	idx = lpfc_cmd->hdwq;
+	if (phba->sli4_hba.hdwq)
+		hdwq = &phba->sli4_hba.hdwq[idx];
 
 	/*
 	 * There are three possibilities here - use scatter-gather segment, use
@@ -4033,19 +4042,22 @@ lpfc_scsi_prep_cmnd(struct lpfc_vport *vport, struct lpfc_scsi_buf *lpfc_cmd,
 						vport->cfg_first_burst_size;
 			}
 			fcp_cmnd->fcpCntl3 = WRITE_DATA;
-			atomic_inc(&phba->fc4ScsiOutputRequests);
+			if (hdwq)
+				hdwq->scsi_cstat.output_requests++;
 		} else {
 			iocb_cmd->ulpCommand = CMD_FCP_IREAD64_CR;
 			iocb_cmd->ulpPU = PARM_READ_CHECK;
 			fcp_cmnd->fcpCntl3 = READ_DATA;
-			atomic_inc(&phba->fc4ScsiInputRequests);
+			if (hdwq)
+				hdwq->scsi_cstat.input_requests++;
 		}
 	} else {
 		iocb_cmd->ulpCommand = CMD_FCP_ICMND64_CR;
 		iocb_cmd->un.fcpi.fcpi_parm = 0;
 		iocb_cmd->ulpPU = 0;
 		fcp_cmnd->fcpCntl3 = 0;
-		atomic_inc(&phba->fc4ScsiControlRequests);
+		if (hdwq)
+			hdwq->scsi_cstat.control_requests++;
 	}
 	if (phba->sli_rev == 3 &&
 	    !(phba->sli3_options & LPFC_SLI3_BG_ENABLED))
@@ -4397,7 +4409,7 @@ lpfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd)
 	struct lpfc_nodelist *ndlp;
 	struct lpfc_scsi_buf *lpfc_cmd;
 	struct fc_rport *rport = starget_to_rport(scsi_target(cmnd->device));
-	int err;
+	int err, idx;
 
 	rdata = lpfc_rport_data_from_scsi_device(cmnd->device);
 
@@ -4532,16 +4544,6 @@ lpfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd)
 				 (uint32_t)
 				 (cmnd->request->timeout / 1000));
 
-		switch (lpfc_cmd->fcp_cmnd->fcpCntl3) {
-		case WRITE_DATA:
-			atomic_dec(&phba->fc4ScsiOutputRequests);
-			break;
-		case READ_DATA:
-			atomic_dec(&phba->fc4ScsiInputRequests);
-			break;
-		default:
-			atomic_dec(&phba->fc4ScsiControlRequests);
-		}
 		goto out_host_busy_free_buf;
 	}
 	if (phba->cfg_poll & ENABLE_FCP_RING_POLLING) {
@@ -4555,7 +4557,20 @@ lpfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd)
 	return 0;
 
  out_host_busy_free_buf:
+	idx = lpfc_cmd->hdwq;
 	lpfc_scsi_unprep_dma_buf(phba, lpfc_cmd);
+	if (phba->sli4_hba.hdwq) {
+		switch (lpfc_cmd->fcp_cmnd->fcpCntl3) {
+		case WRITE_DATA:
+			phba->sli4_hba.hdwq[idx].scsi_cstat.output_requests--;
+			break;
+		case READ_DATA:
+			phba->sli4_hba.hdwq[idx].scsi_cstat.input_requests--;
+			break;
+		default:
+			phba->sli4_hba.hdwq[idx].scsi_cstat.control_requests--;
+		}
+	}
 	lpfc_release_scsi_buf(phba, lpfc_cmd);
  out_host_busy:
 	return SCSI_MLQUEUE_HOST_BUSY;
diff --git a/drivers/scsi/lpfc/lpfc_scsi.h b/drivers/scsi/lpfc/lpfc_scsi.h
index ebb5d5860cdb..68e6c4ad3936 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.h
+++ b/drivers/scsi/lpfc/lpfc_scsi.h
@@ -196,5 +196,8 @@ struct lpfc_scsi_buf {
 
 #define TXRDY_PAYLOAD_LEN	12
 
+/* For sysfs/debugfs tmp string max len */
+#define LPFC_MAX_SCSI_INFO_TMP_LEN	79
+
 int lpfc_sli4_scmd_to_wqidx_distr(struct lpfc_hba *phba,
 				  struct lpfc_scsi_buf *lpfc_cmd);
diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h
index 8e3e99d52f75..a33fb6715329 100644
--- a/drivers/scsi/lpfc/lpfc_sli4.h
+++ b/drivers/scsi/lpfc/lpfc_sli4.h
@@ -533,6 +533,13 @@ struct lpfc_vector_map_info {
 };
 #define LPFC_VECTOR_MAP_EMPTY	0xffff
 
+struct lpfc_fc4_ctrl_stat {
+	u32 input_requests;
+	u32 output_requests;
+	u32 control_requests;
+	u32 io_cmpls;
+};
+
 /* SLI4 HBA data structure entries */
 struct lpfc_sli4_hdw_queue {
 	/* Pointers to the constructed SLI4 queues */
@@ -560,6 +567,10 @@ struct lpfc_sli4_hdw_queue {
 	uint32_t abts_scsi_io_bufs;
 	uint32_t abts_nvme_io_bufs;
 
+	/* FC-4 Stats counters */
+	struct lpfc_fc4_ctrl_stat nvme_cstat;
+	struct lpfc_fc4_ctrl_stat scsi_cstat;
+
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
 #define LPFC_CHECK_CPU_CNT    128
 	uint32_t cpucheck_rcv_io[LPFC_CHECK_CPU_CNT];
-- 
2.16.4