Blob Blame History Raw
From: Sunil Goutham <sgoutham@marvell.com>
Date: Fri, 30 Jul 2021 17:19:14 +0530
Subject: octeontx2-pf: cn10k: Config DWRR weight based on MTU
Patch-mainline: v5.15-rc1
Git-commit: c39830a4ce4df09ddef29ac388b062537c559228
References: jsc#SLE-24682

Program SQ, MDQ, TL4 to TL2 transmit scheduler queues' DWRR
weight based on DWRR MTU programmed at NIX_AF_DWRR_RPM_MTU.
The DWRR MTU from admin function is retrieved via mbox.

On OcteaonTx2 silicon, admin function driver responds with DWRR
MTU as '1'. This helps to avoid silicon specific transmit
scheduler DWRR quantum/weight configuration logic.

Signed-off-by: Sunil Goutham <sgoutham@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Acked-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
---
 drivers/net/ethernet/marvell/octeontx2/af/common.h       |    4 --
 drivers/net/ethernet/marvell/octeontx2/af/mbox.h         |    4 ++
 drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c      |   15 +++++++++
 drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c       |    3 -
 drivers/net/ethernet/marvell/octeontx2/nic/cn10k.h       |   14 +++++++++
 drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c |   23 +++++++++++----
 drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h |    1 
 7 files changed, 52 insertions(+), 12 deletions(-)

--- a/drivers/net/ethernet/marvell/octeontx2/af/common.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/common.h
@@ -146,10 +146,6 @@ enum nix_scheduler {
 #define TXSCH_RR_QTM_MAX		((1 << 24) - 1)
 #define TXSCH_TL1_DFLT_RR_QTM		TXSCH_RR_QTM_MAX
 #define TXSCH_TL1_DFLT_RR_PRIO		(0x1ull)
-#define MAX_SCHED_WEIGHT		0xFF
-#define DFLT_RR_WEIGHT			71
-#define DFLT_RR_QTM	((DFLT_RR_WEIGHT * TXSCH_RR_QTM_MAX) \
-			 / MAX_SCHED_WEIGHT)
 #define CN10K_MAX_DWRR_WEIGHT          16384 /* Weight is 14bit on CN10K */
 
 /* Min/Max packet sizes, excluding FCS */
--- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
@@ -1032,8 +1032,12 @@ struct nix_bp_cfg_rsp {
 
 struct nix_hw_info {
 	struct mbox_msghdr hdr;
+	u16 rsvs16;
 	u16 max_mtu;
 	u16 min_mtu;
+	u32 rpm_dwrr_mtu;
+	u32 sdp_dwrr_mtu;
+	u64 rsvd[16]; /* Add reserved fields for future expansion */
 };
 
 struct nix_bandprof_alloc_req {
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
@@ -2802,6 +2802,7 @@ int rvu_mbox_handler_nix_get_hw_info(str
 				     struct nix_hw_info *rsp)
 {
 	u16 pcifunc = req->hdr.pcifunc;
+	u64 dwrr_mtu;
 	int blkaddr;
 
 	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc);
@@ -2814,6 +2815,20 @@ int rvu_mbox_handler_nix_get_hw_info(str
 		rvu_get_lmac_link_max_frs(rvu, &rsp->max_mtu);
 
 	rsp->min_mtu = NIC_HW_MIN_FRS;
+
+	if (!rvu->hw->cap.nix_common_dwrr_mtu) {
+		/* Return '1' on OTx2 */
+		rsp->rpm_dwrr_mtu = 1;
+		rsp->sdp_dwrr_mtu = 1;
+		return 0;
+	}
+
+	dwrr_mtu = rvu_read64(rvu, BLKADDR_NIX0, NIX_AF_DWRR_RPM_MTU);
+	rsp->rpm_dwrr_mtu = convert_dwrr_mtu_to_bytes(dwrr_mtu);
+
+	dwrr_mtu = rvu_read64(rvu, BLKADDR_NIX0, NIX_AF_DWRR_SDP_MTU);
+	rsp->sdp_dwrr_mtu = convert_dwrr_mtu_to_bytes(dwrr_mtu);
+
 	return 0;
 }
 
--- a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c
@@ -92,8 +92,7 @@ int cn10k_sq_aq_init(void *dev, u16 qidx
 	aq->sq.ena = 1;
 	/* Only one SMQ is allocated, map all SQ's to that SMQ  */
 	aq->sq.smq = pfvf->hw.txschq_list[NIX_TXSCH_LVL_SMQ][0];
-	/* FIXME: set based on NIX_AF_DWRR_RPM_MTU*/
-	aq->sq.smq_rr_weight = pfvf->netdev->mtu;
+	aq->sq.smq_rr_weight = mtu_to_dwrr_weight(pfvf, pfvf->max_frs);
 	aq->sq.default_chan = pfvf->hw.tx_chan_base;
 	aq->sq.sqe_stype = NIX_STYPE_STF; /* Cache SQB */
 	aq->sq.sqb_aura = sqb_aura;
--- a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.h
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.h
@@ -9,6 +9,20 @@
 
 #include "otx2_common.h"
 
+static inline int mtu_to_dwrr_weight(struct otx2_nic *pfvf, int mtu)
+{
+	u32 weight;
+
+	/* On OTx2, since AF returns DWRR_MTU as '1', this logic
+	 * will work on those silicons as well.
+	 */
+	weight = mtu / pfvf->hw.dwrr_mtu;
+	if (mtu % pfvf->hw.dwrr_mtu)
+		weight += 1;
+
+	return weight;
+}
+
 void cn10k_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq);
 void cn10k_sqe_flush(void *dev, struct otx2_snd_queue *sq, int size, int qidx);
 int cn10k_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura);
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
@@ -596,6 +596,9 @@ int otx2_txschq_config(struct otx2_nic *
 	struct otx2_hw *hw = &pfvf->hw;
 	struct nix_txschq_config *req;
 	u64 schq, parent;
+	u64 dwrr_val;
+
+	dwrr_val = mtu_to_dwrr_weight(pfvf, pfvf->max_frs);
 
 	req = otx2_mbox_alloc_msg_nix_txschq_cfg(&pfvf->mbox);
 	if (!req)
@@ -621,21 +624,21 @@ int otx2_txschq_config(struct otx2_nic *
 		req->num_regs++;
 		/* Set DWRR quantum */
 		req->reg[2] = NIX_AF_MDQX_SCHEDULE(schq);
-		req->regval[2] =  DFLT_RR_QTM;
+		req->regval[2] =  dwrr_val;
 	} else if (lvl == NIX_TXSCH_LVL_TL4) {
 		parent =  hw->txschq_list[NIX_TXSCH_LVL_TL3][0];
 		req->reg[0] = NIX_AF_TL4X_PARENT(schq);
 		req->regval[0] = parent << 16;
 		req->num_regs++;
 		req->reg[1] = NIX_AF_TL4X_SCHEDULE(schq);
-		req->regval[1] = DFLT_RR_QTM;
+		req->regval[1] = dwrr_val;
 	} else if (lvl == NIX_TXSCH_LVL_TL3) {
 		parent = hw->txschq_list[NIX_TXSCH_LVL_TL2][0];
 		req->reg[0] = NIX_AF_TL3X_PARENT(schq);
 		req->regval[0] = parent << 16;
 		req->num_regs++;
 		req->reg[1] = NIX_AF_TL3X_SCHEDULE(schq);
-		req->regval[1] = DFLT_RR_QTM;
+		req->regval[1] = dwrr_val;
 	} else if (lvl == NIX_TXSCH_LVL_TL2) {
 		parent =  hw->txschq_list[NIX_TXSCH_LVL_TL1][0];
 		req->reg[0] = NIX_AF_TL2X_PARENT(schq);
@@ -643,7 +646,7 @@ int otx2_txschq_config(struct otx2_nic *
 
 		req->num_regs++;
 		req->reg[1] = NIX_AF_TL2X_SCHEDULE(schq);
-		req->regval[1] = TXSCH_TL1_DFLT_RR_PRIO << 24 | DFLT_RR_QTM;
+		req->regval[1] = TXSCH_TL1_DFLT_RR_PRIO << 24 | dwrr_val;
 
 		req->num_regs++;
 		req->reg[2] = NIX_AF_TL3_TL2X_LINKX_CFG(schq,
@@ -656,7 +659,10 @@ int otx2_txschq_config(struct otx2_nic *
 		 * For VF this is always ignored.
 		 */
 
-		/* Set DWRR quantum */
+		/* On CN10K, if RR_WEIGHT is greater than 16384, HW will
+		 * clip it to 16384, so configuring a 24bit max value
+		 * will work on both OTx2 and CN10K.
+		 */
 		req->reg[0] = NIX_AF_TL1X_SCHEDULE(schq);
 		req->regval[0] = TXSCH_TL1_DFLT_RR_QTM;
 
@@ -803,7 +809,7 @@ int otx2_sq_aq_init(void *dev, u16 qidx,
 	aq->sq.ena = 1;
 	/* Only one SMQ is allocated, map all SQ's to that SMQ  */
 	aq->sq.smq = pfvf->hw.txschq_list[NIX_TXSCH_LVL_SMQ][0];
-	aq->sq.smq_rr_quantum = DFLT_RR_QTM;
+	aq->sq.smq_rr_quantum = mtu_to_dwrr_weight(pfvf, pfvf->max_frs);
 	aq->sq.default_chan = pfvf->hw.tx_chan_base;
 	aq->sq.sqe_stype = NIX_STYPE_STF; /* Cache SQB */
 	aq->sq.sqb_aura = sqb_aura;
@@ -1668,6 +1674,11 @@ u16 otx2_get_max_mtu(struct otx2_nic *pf
 		 * SMQ errors
 		 */
 		max_mtu = rsp->max_mtu - 8 - OTX2_ETH_HLEN;
+
+		/* Also save DWRR MTU, needed for DWRR weight calculation */
+		pfvf->hw.dwrr_mtu = rsp->rpm_dwrr_mtu;
+		if (!pfvf->hw.dwrr_mtu)
+			pfvf->hw.dwrr_mtu = 1;
 	}
 
 out:
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
@@ -181,6 +181,7 @@ struct otx2_hw {
 	/* NIX */
 	u16		txschq_list[NIX_TXSCH_LVL_CNT][MAX_TXSCHQ_PER_FUNC];
 	u16			matchall_ipolicer;
+	u32			dwrr_mtu;
 
 	/* HW settings, coalescing etc */
 	u16			rx_chan_base;