Blob Blame History Raw
From: James Smart <jsmart2021@gmail.com>
Date: Thu, 22 Feb 2018 08:18:48 -0800
Subject: scsi: lpfc: Add embedded data pointers for enhanced performance
Patch-mainline: v4.17-rc1
Git-commit: 0bc2b7c5317bd51df571e9d1131547901215f6c9
References: bsc#1082595

The current driver isn't taking advantage of a performance hint whereby
the initial data buffer descriptor can be placed in the WQE as well as
the SGL.

Add the logic to detect support for the feature and to use it when
supported.

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc.h       |    2 ++
 drivers/scsi/lpfc/lpfc_hw4.h   |    3 +++
 drivers/scsi/lpfc/lpfc_init.c  |   20 ++++++++++++++++++++
 drivers/scsi/lpfc/lpfc_nvme.c  |   18 ++++++++++++++++++
 drivers/scsi/lpfc/lpfc_nvmet.c |   24 ++++++++++++++++++++++++
 drivers/scsi/lpfc/lpfc_scsi.c  |    8 ++++++--
 drivers/scsi/lpfc/lpfc_sli.c   |   25 +++++++++++++++++++++----
 7 files changed, 94 insertions(+), 6 deletions(-)

--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -840,6 +840,8 @@ struct lpfc_hba {
 #define LPFC_ENABLE_FCP  1
 #define LPFC_ENABLE_NVME 2
 #define LPFC_ENABLE_BOTH 3
+	uint32_t nvme_embed_pbde;
+	uint32_t fcp_embed_pbde;
 	uint32_t io_channel_irqs;	/* number of irqs for io channels */
 	struct nvmet_fc_target_port *targetport;
 	lpfc_vpd_t vpd;		/* vital product data */
--- a/drivers/scsi/lpfc/lpfc_hw4.h
+++ b/drivers/scsi/lpfc/lpfc_hw4.h
@@ -4226,6 +4226,9 @@ struct wqe_common {
 #define wqe_irsp_SHIFT        4
 #define wqe_irsp_MASK         0x00000001
 #define wqe_irsp_WORD         word11
+#define wqe_pbde_SHIFT        5
+#define wqe_pbde_MASK         0x00000001
+#define wqe_pbde_WORD         word11
 #define wqe_sup_SHIFT         6
 #define wqe_sup_MASK          0x00000001
 #define wqe_sup_WORD          word11
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -10615,6 +10615,19 @@ lpfc_get_sli4_parameters(struct lpfc_hba
 		phba->cfg_enable_fc4_type = LPFC_ENABLE_FCP;
 	}
 
+	/* Only embed PBDE for if_type 6 */
+	if (bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) ==
+	    LPFC_SLI_INTF_IF_TYPE_6) {
+		phba->fcp_embed_pbde = 1;
+		phba->nvme_embed_pbde = 1;
+	}
+
+	/* PBDE support requires xib be set */
+	if (!bf_get(cfg_xib, mbx_sli4_parameters)) {
+		phba->fcp_embed_pbde = 0;
+		phba->nvme_embed_pbde = 0;
+	}
+
 	/*
 	 * To support Suppress Response feature we must satisfy 3 conditions.
 	 * lpfc_suppress_rsp module parameter must be set (default).
@@ -10646,6 +10659,13 @@ lpfc_get_sli4_parameters(struct lpfc_hba
 	else
 		phba->fcp_embed_io = 0;
 
+	lpfc_printf_log(phba, KERN_INFO, LOG_INIT | LOG_NVME,
+			"6422 XIB %d: FCP %d %d NVME %d %d %d\n",
+			bf_get(cfg_xib, mbx_sli4_parameters),
+			phba->fcp_embed_pbde, phba->fcp_embed_io,
+			phba->nvme_support, phba->nvme_embed_pbde,
+			phba->cfg_suppress_rsp);
+
 	if ((bf_get(cfg_cqpsize, mbx_sli4_parameters) & LPFC_CQ_16K_PAGE_SZ) &&
 	    (bf_get(cfg_wqpsize, mbx_sli4_parameters) & LPFC_WQ_16K_PAGE_SZ) &&
 	    (sli4_params->wqsize & LPFC_WQ_SZ128_SUPPORT))
--- a/drivers/scsi/lpfc/lpfc_nvme.c
+++ b/drivers/scsi/lpfc/lpfc_nvme.c
@@ -1170,6 +1170,7 @@ lpfc_nvme_prep_io_dma(struct lpfc_vport
 	struct sli4_sge *sgl = lpfc_ncmd->nvme_sgl;
 	struct scatterlist *data_sg;
 	struct sli4_sge *first_data_sgl;
+	struct ulp_bde64 *bde;
 	dma_addr_t physaddr;
 	uint32_t num_bde = 0;
 	uint32_t dma_len;
@@ -1237,7 +1238,24 @@ lpfc_nvme_prep_io_dma(struct lpfc_vport
 			data_sg = sg_next(data_sg);
 			sgl++;
 		}
+		if (phba->nvme_embed_pbde) {
+			/* Use PBDE support for first SGL only, offset == 0 */
+			/* Words 13-15 */
+			bde = (struct ulp_bde64 *)
+				&wqe->words[13];
+			bde->addrLow = first_data_sgl->addr_lo;
+			bde->addrHigh = first_data_sgl->addr_hi;
+			bde->tus.f.bdeSize =
+				le32_to_cpu(first_data_sgl->sge_len);
+			bde->tus.f.bdeFlags = BUFF_TYPE_BDE_64;
+			bde->tus.w = cpu_to_le32(bde->tus.w);
+			bf_set(wqe_pbde, &wqe->generic.wqe_com, 1);
+		} else
+			bf_set(wqe_pbde, &wqe->generic.wqe_com, 0);
+
 	} else {
+		bf_set(wqe_pbde, &wqe->generic.wqe_com, 0);
+
 		/* For this clause to be valid, the payload_length
 		 * and sg_cnt must zero.
 		 */
--- a/drivers/scsi/lpfc/lpfc_nvmet.c
+++ b/drivers/scsi/lpfc/lpfc_nvmet.c
@@ -2150,9 +2150,11 @@ lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba
 	struct lpfc_iocbq *nvmewqe;
 	struct scatterlist *sgel;
 	union lpfc_wqe128 *wqe;
+	struct ulp_bde64 *bde;
 	uint32_t *txrdy;
 	dma_addr_t physaddr;
 	int i, cnt;
+	int do_pbde;
 	int xc = 1;
 
 	if (!lpfc_is_link_up(phba)) {
@@ -2243,6 +2245,7 @@ lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba
 		/* Word 7 */
 		bf_set(wqe_pu, &wqe->fcp_tsend.wqe_com, 1);
 		bf_set(wqe_cmnd, &wqe->fcp_tsend.wqe_com, CMD_FCP_TSEND64_WQE);
+		do_pbde = 0;
 
 		/* Word 8 */
 		wqe->fcp_tsend.wqe_com.abort_tag = nvmewqe->iotag;
@@ -2355,6 +2358,10 @@ lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba
 		bf_set(wqe_ar, &wqe->fcp_treceive.wqe_com, 0);
 		bf_set(wqe_cmnd, &wqe->fcp_treceive.wqe_com,
 		       CMD_FCP_TRECEIVE64_WQE);
+		if (phba->nvme_embed_pbde)
+			do_pbde = 1;
+		else
+			do_pbde = 0;
 
 		/* Word 8 */
 		wqe->fcp_treceive.wqe_com.abort_tag = nvmewqe->iotag;
@@ -2438,6 +2445,7 @@ lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba
 		bf_set(wqe_pu, &wqe->fcp_trsp.wqe_com, 0);
 		bf_set(wqe_ag, &wqe->fcp_trsp.wqe_com, 1);
 		bf_set(wqe_cmnd, &wqe->fcp_trsp.wqe_com, CMD_FCP_TRSP64_WQE);
+		do_pbde = 0;
 
 		/* Word 8 */
 		wqe->fcp_trsp.wqe_com.abort_tag = nvmewqe->iotag;
@@ -2508,9 +2516,25 @@ lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba
 			bf_set(lpfc_sli4_sge_last, sgl, 1);
 		sgl->word2 = cpu_to_le32(sgl->word2);
 		sgl->sge_len = cpu_to_le32(cnt);
+		if (do_pbde && i == 0) {
+			bde = (struct ulp_bde64 *)&wqe->words[13];
+			memset(bde, 0, sizeof(struct ulp_bde64));
+			/* Words 13-15  (PBDE)*/
+			bde->addrLow = sgl->addr_lo;
+			bde->addrHigh = sgl->addr_hi;
+			bde->tus.f.bdeSize =
+				le32_to_cpu(sgl->sge_len);
+			bde->tus.f.bdeFlags = BUFF_TYPE_BDE_64;
+			bde->tus.w = cpu_to_le32(bde->tus.w);
+		}
 		sgl++;
 		ctxp->offset += cnt;
 	}
+
+	if (do_pbde)
+		bf_set(wqe_pbde, &wqe->generic.wqe_com, 1);
+	else
+		bf_set(wqe_pbde, &wqe->generic.wqe_com, 0);
 	ctxp->state = LPFC_NVMET_STE_DATA;
 	ctxp->entry_cnt++;
 	return nvmewqe;
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -3304,8 +3304,12 @@ lpfc_scsi_prep_dma_buf_s4(struct lpfc_hb
 			dma_offset += dma_len;
 			sgl++;
 		}
-		/* setup the performance hint (first data BDE) if enabled */
-		if (phba->sli3_options & LPFC_SLI4_PERFH_ENABLED) {
+		/*
+		 * Setup the first Payload BDE. For FCoE we just key off
+		 * Performance Hints, for FC we utilize fcp_embed_pbde.
+		 */
+		if ((phba->sli3_options & LPFC_SLI4_PERFH_ENABLED) ||
+		    phba->fcp_embed_pbde) {
 			bde = (struct ulp_bde64 *)
 					&(iocb_cmd->unsli3.sli3Words[5]);
 			bde->addrLow = first_data_sgl->addr_lo;
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -6958,10 +6958,15 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phb
 				"0378 No support for fcpi mode.\n");
 		ftr_rsp++;
 	}
-	if (bf_get(lpfc_mbx_rq_ftr_rsp_perfh, &mqe->un.req_ftrs))
-		phba->sli3_options |= LPFC_SLI4_PERFH_ENABLED;
-	else
-		phba->sli3_options &= ~LPFC_SLI4_PERFH_ENABLED;
+
+	/* Performance Hints are ONLY for FCoE */
+	if (phba->hba_flag & HBA_FCOE_MODE) {
+		if (bf_get(lpfc_mbx_rq_ftr_rsp_perfh, &mqe->un.req_ftrs))
+			phba->sli3_options |= LPFC_SLI4_PERFH_ENABLED;
+		else
+			phba->sli3_options &= ~LPFC_SLI4_PERFH_ENABLED;
+	}
+
 	/*
 	 * If the port cannot support the host's requested features
 	 * then turn off the global config parameters to disable the
@@ -9063,6 +9068,12 @@ lpfc_sli4_iocb2wqe(struct lpfc_hba *phba
 		}
 		/* Note, word 10 is already initialized to 0 */
 
+		/* Don't set PBDE for Perf hints, just fcp_embed_pbde */
+		if (phba->fcp_embed_pbde)
+			bf_set(wqe_pbde, &wqe->fcp_iwrite.wqe_com, 1);
+		else
+			bf_set(wqe_pbde, &wqe->fcp_iwrite.wqe_com, 0);
+
 		if (phba->fcp_embed_io) {
 			struct lpfc_scsi_buf *lpfc_cmd;
 			struct sli4_sge *sgl;
@@ -9122,6 +9133,12 @@ lpfc_sli4_iocb2wqe(struct lpfc_hba *phba
 		}
 		/* Note, word 10 is already initialized to 0 */
 
+		/* Don't set PBDE for Perf hints, just fcp_embed_pbde */
+		if (phba->fcp_embed_pbde)
+			bf_set(wqe_pbde, &wqe->fcp_iread.wqe_com, 1);
+		else
+			bf_set(wqe_pbde, &wqe->fcp_iread.wqe_com, 0);
+
 		if (phba->fcp_embed_io) {
 			struct lpfc_scsi_buf *lpfc_cmd;
 			struct sli4_sge *sgl;