Blob Blame History Raw
From: Selvin Xavier <selvin.xavier@broadcom.com>
Date: Wed, 15 Sep 2021 05:32:32 -0700
Subject: RDMA/bnxt_re: Add extended statistics counters
Patch-mainline: v5.16-rc1
Git-commit: 9a381f7e5aa299de3500b8afa2237e5d1eab63fb
References: jsc#SLE-18977

Implement extended statistics counters for newer adapters. Check if the FW
support for this command and issue the FW command only if is
supported. Includes code re-organization to handle extended stats. Also,
add AH and PD software counters.

Link: https://lore.kernel.org/r/1631709163-2287-2-git-send-email-selvin.xavier@broadcom.com
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Acked-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
---
 drivers/infiniband/hw/bnxt_re/bnxt_re.h     |    5 
 drivers/infiniband/hw/bnxt_re/hw_counters.c |  273 ++++++++++++++++++----------
 drivers/infiniband/hw/bnxt_re/hw_counters.h |   28 ++
 drivers/infiniband/hw/bnxt_re/ib_verbs.c    |   16 +
 drivers/infiniband/hw/bnxt_re/main.c        |    4 
 drivers/infiniband/hw/bnxt_re/qplib_fp.c    |    3 
 drivers/infiniband/hw/bnxt_re/qplib_res.h   |    9 
 drivers/infiniband/hw/bnxt_re/qplib_sp.c    |   51 +++++
 drivers/infiniband/hw/bnxt_re/qplib_sp.h    |   28 ++
 drivers/infiniband/hw/bnxt_re/roce_hsi.h    |   85 ++++++++
 10 files changed, 401 insertions(+), 101 deletions(-)

--- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h
+++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
@@ -39,6 +39,7 @@
 
 #ifndef __BNXT_RE_H__
 #define __BNXT_RE_H__
+#include "hw_counters.h"
 #define ROCE_DRV_MODULE_NAME		"bnxt_re"
 
 #define BNXT_RE_DESC	"Broadcom NetXtreme-C/E RoCE Driver"
@@ -177,15 +178,17 @@ struct bnxt_re_dev {
 	atomic_t			srq_count;
 	atomic_t			mr_count;
 	atomic_t			mw_count;
+	atomic_t			ah_count;
+	atomic_t			pd_count;
 	/* Max of 2 lossless traffic class supported per port */
 	u16				cosq[2];
 
 	/* QP for for handling QP1 packets */
 	struct bnxt_re_gsi_context	gsi_ctx;
+	struct bnxt_re_stats		stats;
 	atomic_t nq_alloc_cnt;
 	u32 is_virtfn;
 	u32 num_vfs;
-	struct bnxt_qplib_roce_stats	stats;
 };
 
 #define to_bnxt_re_dev(ptr, member)	\
--- a/drivers/infiniband/hw/bnxt_re/hw_counters.c
+++ b/drivers/infiniband/hw/bnxt_re/hw_counters.c
@@ -58,6 +58,8 @@
 #include "hw_counters.h"
 
 static const char * const bnxt_re_stat_name[] = {
+	[BNXT_RE_ACTIVE_PD]		=  "active_pds",
+	[BNXT_RE_ACTIVE_AH]		=  "active_ahs",
 	[BNXT_RE_ACTIVE_QP]		=  "active_qps",
 	[BNXT_RE_ACTIVE_SRQ]		=  "active_srqs",
 	[BNXT_RE_ACTIVE_CQ]		=  "active_cqs",
@@ -109,17 +111,154 @@ static const char * const bnxt_re_stat_n
 	[BNXT_RE_RES_SRQ_LOAD_ERR]      = "res_srq_load_err",
 	[BNXT_RE_RES_TX_PCI_ERR]        = "res_tx_pci_err",
 	[BNXT_RE_RES_RX_PCI_ERR]        = "res_rx_pci_err",
-	[BNXT_RE_OUT_OF_SEQ_ERR]        = "oos_drop_count"
+	[BNXT_RE_OUT_OF_SEQ_ERR]        = "oos_drop_count",
+	[BNXT_RE_TX_ATOMIC_REQ]		= "tx_atomic_req",
+	[BNXT_RE_TX_READ_REQ]		= "tx_read_req",
+	[BNXT_RE_TX_READ_RES]		= "tx_read_resp",
+	[BNXT_RE_TX_WRITE_REQ]		= "tx_write_req",
+	[BNXT_RE_TX_SEND_REQ]		= "tx_send_req",
+	[BNXT_RE_RX_ATOMIC_REQ]		= "rx_atomic_req",
+	[BNXT_RE_RX_READ_REQ]		= "rx_read_req",
+	[BNXT_RE_RX_READ_RESP]		= "rx_read_resp",
+	[BNXT_RE_RX_WRITE_REQ]		= "rx_write_req",
+	[BNXT_RE_RX_SEND_REQ]		= "rx_send_req",
+	[BNXT_RE_RX_ROCE_GOOD_PKTS]	= "rx_roce_good_pkts",
+	[BNXT_RE_RX_ROCE_GOOD_BYTES]	= "rx_roce_good_bytes",
+	[BNXT_RE_OOB]			= "rx_out_of_buffer"
 };
 
+static void bnxt_re_copy_ext_stats(struct bnxt_re_dev *rdev,
+				   struct rdma_hw_stats *stats,
+				   struct bnxt_qplib_ext_stat *s)
+{
+	stats->value[BNXT_RE_TX_ATOMIC_REQ] = s->tx_atomic_req;
+	stats->value[BNXT_RE_TX_READ_REQ]   = s->tx_read_req;
+	stats->value[BNXT_RE_TX_READ_RES]   = s->tx_read_res;
+	stats->value[BNXT_RE_TX_WRITE_REQ]  = s->tx_write_req;
+	stats->value[BNXT_RE_TX_SEND_REQ]   = s->tx_send_req;
+	stats->value[BNXT_RE_RX_ATOMIC_REQ] = s->rx_atomic_req;
+	stats->value[BNXT_RE_RX_READ_REQ]   = s->rx_read_req;
+	stats->value[BNXT_RE_RX_READ_RESP]  = s->rx_read_res;
+	stats->value[BNXT_RE_RX_WRITE_REQ]  = s->rx_write_req;
+	stats->value[BNXT_RE_RX_SEND_REQ]   = s->rx_send_req;
+	stats->value[BNXT_RE_RX_ROCE_GOOD_PKTS] = s->rx_roce_good_pkts;
+	stats->value[BNXT_RE_RX_ROCE_GOOD_BYTES] = s->rx_roce_good_bytes;
+	stats->value[BNXT_RE_OOB] = s->rx_out_of_buffer;
+}
+
+static int bnxt_re_get_ext_stat(struct bnxt_re_dev *rdev,
+				struct rdma_hw_stats *stats)
+{
+	struct bnxt_qplib_ext_stat *estat = &rdev->stats.rstat.ext_stat;
+	u32 fid;
+	int rc;
+
+	fid = PCI_FUNC(rdev->en_dev->pdev->devfn);
+	rc = bnxt_qplib_qext_stat(&rdev->rcfw, fid, estat);
+	if (rc)
+		goto done;
+	bnxt_re_copy_ext_stats(rdev, stats, estat);
+
+done:
+	return rc;
+}
+
+static void bnxt_re_copy_err_stats(struct bnxt_re_dev *rdev,
+				   struct rdma_hw_stats *stats,
+				   struct bnxt_qplib_roce_stats *err_s)
+{
+	stats->value[BNXT_RE_TO_RETRANSMITS] =
+				err_s->to_retransmits;
+	stats->value[BNXT_RE_SEQ_ERR_NAKS_RCVD] =
+				err_s->seq_err_naks_rcvd;
+	stats->value[BNXT_RE_MAX_RETRY_EXCEEDED] =
+				err_s->max_retry_exceeded;
+	stats->value[BNXT_RE_RNR_NAKS_RCVD] =
+				err_s->rnr_naks_rcvd;
+	stats->value[BNXT_RE_MISSING_RESP] =
+				err_s->missing_resp;
+	stats->value[BNXT_RE_UNRECOVERABLE_ERR] =
+				err_s->unrecoverable_err;
+	stats->value[BNXT_RE_BAD_RESP_ERR] =
+				err_s->bad_resp_err;
+	stats->value[BNXT_RE_LOCAL_QP_OP_ERR]	=
+			err_s->local_qp_op_err;
+	stats->value[BNXT_RE_LOCAL_PROTECTION_ERR] =
+			err_s->local_protection_err;
+	stats->value[BNXT_RE_MEM_MGMT_OP_ERR] =
+			err_s->mem_mgmt_op_err;
+	stats->value[BNXT_RE_REMOTE_INVALID_REQ_ERR] =
+			err_s->remote_invalid_req_err;
+	stats->value[BNXT_RE_REMOTE_ACCESS_ERR] =
+			err_s->remote_access_err;
+	stats->value[BNXT_RE_REMOTE_OP_ERR] =
+			err_s->remote_op_err;
+	stats->value[BNXT_RE_DUP_REQ] =
+			err_s->dup_req;
+	stats->value[BNXT_RE_RES_EXCEED_MAX] =
+			err_s->res_exceed_max;
+	stats->value[BNXT_RE_RES_LENGTH_MISMATCH] =
+			err_s->res_length_mismatch;
+	stats->value[BNXT_RE_RES_EXCEEDS_WQE] =
+			err_s->res_exceeds_wqe;
+	stats->value[BNXT_RE_RES_OPCODE_ERR] =
+			err_s->res_opcode_err;
+	stats->value[BNXT_RE_RES_RX_INVALID_RKEY] =
+			err_s->res_rx_invalid_rkey;
+	stats->value[BNXT_RE_RES_RX_DOMAIN_ERR] =
+			err_s->res_rx_domain_err;
+	stats->value[BNXT_RE_RES_RX_NO_PERM] =
+			err_s->res_rx_no_perm;
+	stats->value[BNXT_RE_RES_RX_RANGE_ERR]  =
+			err_s->res_rx_range_err;
+	stats->value[BNXT_RE_RES_TX_INVALID_RKEY] =
+			err_s->res_tx_invalid_rkey;
+	stats->value[BNXT_RE_RES_TX_DOMAIN_ERR] =
+			err_s->res_tx_domain_err;
+	stats->value[BNXT_RE_RES_TX_NO_PERM] =
+			err_s->res_tx_no_perm;
+	stats->value[BNXT_RE_RES_TX_RANGE_ERR]  =
+			err_s->res_tx_range_err;
+	stats->value[BNXT_RE_RES_IRRQ_OFLOW] =
+			err_s->res_irrq_oflow;
+	stats->value[BNXT_RE_RES_UNSUP_OPCODE]  =
+			err_s->res_unsup_opcode;
+	stats->value[BNXT_RE_RES_UNALIGNED_ATOMIC] =
+			err_s->res_unaligned_atomic;
+	stats->value[BNXT_RE_RES_REM_INV_ERR]   =
+			err_s->res_rem_inv_err;
+	stats->value[BNXT_RE_RES_MEM_ERROR] =
+			err_s->res_mem_error;
+	stats->value[BNXT_RE_RES_SRQ_ERR] =
+			err_s->res_srq_err;
+	stats->value[BNXT_RE_RES_CMP_ERR] =
+			err_s->res_cmp_err;
+	stats->value[BNXT_RE_RES_INVALID_DUP_RKEY] =
+			err_s->res_invalid_dup_rkey;
+	stats->value[BNXT_RE_RES_WQE_FORMAT_ERR] =
+			err_s->res_wqe_format_err;
+	stats->value[BNXT_RE_RES_CQ_LOAD_ERR]   =
+			err_s->res_cq_load_err;
+	stats->value[BNXT_RE_RES_SRQ_LOAD_ERR]  =
+			err_s->res_srq_load_err;
+	stats->value[BNXT_RE_RES_TX_PCI_ERR]    =
+			err_s->res_tx_pci_err;
+	stats->value[BNXT_RE_RES_RX_PCI_ERR]    =
+			err_s->res_rx_pci_err;
+	stats->value[BNXT_RE_OUT_OF_SEQ_ERR]    =
+			err_s->res_oos_drop_count;
+}
+
 int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev,
 			    struct rdma_hw_stats *stats,
 			    u32 port, int index)
 {
 	struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
-	struct ctx_hw_stats *bnxt_re_stats = rdev->qplib_ctx.stats.dma;
+	struct ctx_hw_stats *hw_stats = NULL;
+	struct bnxt_qplib_roce_stats *err_s = NULL;
 	int rc  = 0;
 
+	hw_stats = rdev->qplib_ctx.stats.dma;
 	if (!port || !stats)
 		return -EINVAL;
 
@@ -128,118 +267,62 @@ int bnxt_re_ib_get_hw_stats(struct ib_de
 	stats->value[BNXT_RE_ACTIVE_CQ] = atomic_read(&rdev->cq_count);
 	stats->value[BNXT_RE_ACTIVE_MR] = atomic_read(&rdev->mr_count);
 	stats->value[BNXT_RE_ACTIVE_MW] = atomic_read(&rdev->mw_count);
-	if (bnxt_re_stats) {
+	stats->value[BNXT_RE_ACTIVE_PD] = atomic_read(&rdev->pd_count);
+	stats->value[BNXT_RE_ACTIVE_AH] = atomic_read(&rdev->ah_count);
+
+	if (hw_stats) {
 		stats->value[BNXT_RE_RECOVERABLE_ERRORS] =
-			le64_to_cpu(bnxt_re_stats->tx_bcast_pkts);
+			le64_to_cpu(hw_stats->tx_bcast_pkts);
 		stats->value[BNXT_RE_RX_DROPS] =
-			le64_to_cpu(bnxt_re_stats->rx_error_pkts);
+			le64_to_cpu(hw_stats->rx_error_pkts);
 		stats->value[BNXT_RE_RX_DISCARDS] =
-			le64_to_cpu(bnxt_re_stats->rx_discard_pkts);
+			le64_to_cpu(hw_stats->rx_discard_pkts);
 		stats->value[BNXT_RE_RX_PKTS] =
-			le64_to_cpu(bnxt_re_stats->rx_ucast_pkts);
+			le64_to_cpu(hw_stats->rx_ucast_pkts);
 		stats->value[BNXT_RE_RX_BYTES] =
-			le64_to_cpu(bnxt_re_stats->rx_ucast_bytes);
+			le64_to_cpu(hw_stats->rx_ucast_bytes);
 		stats->value[BNXT_RE_TX_PKTS] =
-			le64_to_cpu(bnxt_re_stats->tx_ucast_pkts);
+			le64_to_cpu(hw_stats->tx_ucast_pkts);
 		stats->value[BNXT_RE_TX_BYTES] =
-			le64_to_cpu(bnxt_re_stats->tx_ucast_bytes);
+			le64_to_cpu(hw_stats->tx_ucast_bytes);
 	}
+	err_s = &rdev->stats.rstat.errs;
 	if (test_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, &rdev->flags)) {
-		rc = bnxt_qplib_get_roce_stats(&rdev->rcfw, &rdev->stats);
-		if (rc)
+		rc = bnxt_qplib_get_roce_stats(&rdev->rcfw, err_s);
+		if (rc) {
 			clear_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS,
 				  &rdev->flags);
-		stats->value[BNXT_RE_TO_RETRANSMITS] =
-					rdev->stats.to_retransmits;
-		stats->value[BNXT_RE_SEQ_ERR_NAKS_RCVD] =
-					rdev->stats.seq_err_naks_rcvd;
-		stats->value[BNXT_RE_MAX_RETRY_EXCEEDED] =
-					rdev->stats.max_retry_exceeded;
-		stats->value[BNXT_RE_RNR_NAKS_RCVD] =
-					rdev->stats.rnr_naks_rcvd;
-		stats->value[BNXT_RE_MISSING_RESP] =
-					rdev->stats.missing_resp;
-		stats->value[BNXT_RE_UNRECOVERABLE_ERR] =
-					rdev->stats.unrecoverable_err;
-		stats->value[BNXT_RE_BAD_RESP_ERR] =
-					rdev->stats.bad_resp_err;
-		stats->value[BNXT_RE_LOCAL_QP_OP_ERR]	=
-				rdev->stats.local_qp_op_err;
-		stats->value[BNXT_RE_LOCAL_PROTECTION_ERR] =
-				rdev->stats.local_protection_err;
-		stats->value[BNXT_RE_MEM_MGMT_OP_ERR] =
-				rdev->stats.mem_mgmt_op_err;
-		stats->value[BNXT_RE_REMOTE_INVALID_REQ_ERR] =
-				rdev->stats.remote_invalid_req_err;
-		stats->value[BNXT_RE_REMOTE_ACCESS_ERR] =
-				rdev->stats.remote_access_err;
-		stats->value[BNXT_RE_REMOTE_OP_ERR] =
-				rdev->stats.remote_op_err;
-		stats->value[BNXT_RE_DUP_REQ] =
-				rdev->stats.dup_req;
-		stats->value[BNXT_RE_RES_EXCEED_MAX] =
-				rdev->stats.res_exceed_max;
-		stats->value[BNXT_RE_RES_LENGTH_MISMATCH] =
-				rdev->stats.res_length_mismatch;
-		stats->value[BNXT_RE_RES_EXCEEDS_WQE] =
-				rdev->stats.res_exceeds_wqe;
-		stats->value[BNXT_RE_RES_OPCODE_ERR] =
-				rdev->stats.res_opcode_err;
-		stats->value[BNXT_RE_RES_RX_INVALID_RKEY] =
-				rdev->stats.res_rx_invalid_rkey;
-		stats->value[BNXT_RE_RES_RX_DOMAIN_ERR] =
-				rdev->stats.res_rx_domain_err;
-		stats->value[BNXT_RE_RES_RX_NO_PERM] =
-				rdev->stats.res_rx_no_perm;
-		stats->value[BNXT_RE_RES_RX_RANGE_ERR]  =
-				rdev->stats.res_rx_range_err;
-		stats->value[BNXT_RE_RES_TX_INVALID_RKEY] =
-				rdev->stats.res_tx_invalid_rkey;
-		stats->value[BNXT_RE_RES_TX_DOMAIN_ERR] =
-				rdev->stats.res_tx_domain_err;
-		stats->value[BNXT_RE_RES_TX_NO_PERM] =
-				rdev->stats.res_tx_no_perm;
-		stats->value[BNXT_RE_RES_TX_RANGE_ERR]  =
-				rdev->stats.res_tx_range_err;
-		stats->value[BNXT_RE_RES_IRRQ_OFLOW] =
-				rdev->stats.res_irrq_oflow;
-		stats->value[BNXT_RE_RES_UNSUP_OPCODE]  =
-				rdev->stats.res_unsup_opcode;
-		stats->value[BNXT_RE_RES_UNALIGNED_ATOMIC] =
-				rdev->stats.res_unaligned_atomic;
-		stats->value[BNXT_RE_RES_REM_INV_ERR]   =
-				rdev->stats.res_rem_inv_err;
-		stats->value[BNXT_RE_RES_MEM_ERROR] =
-				rdev->stats.res_mem_error;
-		stats->value[BNXT_RE_RES_SRQ_ERR] =
-				rdev->stats.res_srq_err;
-		stats->value[BNXT_RE_RES_CMP_ERR] =
-				rdev->stats.res_cmp_err;
-		stats->value[BNXT_RE_RES_INVALID_DUP_RKEY] =
-				rdev->stats.res_invalid_dup_rkey;
-		stats->value[BNXT_RE_RES_WQE_FORMAT_ERR] =
-				rdev->stats.res_wqe_format_err;
-		stats->value[BNXT_RE_RES_CQ_LOAD_ERR]   =
-				rdev->stats.res_cq_load_err;
-		stats->value[BNXT_RE_RES_SRQ_LOAD_ERR]  =
-				rdev->stats.res_srq_load_err;
-		stats->value[BNXT_RE_RES_TX_PCI_ERR]    =
-				rdev->stats.res_tx_pci_err;
-		stats->value[BNXT_RE_RES_RX_PCI_ERR]    =
-				rdev->stats.res_rx_pci_err;
-		stats->value[BNXT_RE_OUT_OF_SEQ_ERR]    =
-				rdev->stats.res_oos_drop_count;
+			goto done;
+		}
+		if (_is_ext_stats_supported(rdev->dev_attr.dev_cap_flags) &&
+		    !rdev->is_virtfn) {
+			rc = bnxt_re_get_ext_stat(rdev, stats);
+			if (rc) {
+				clear_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS,
+					  &rdev->flags);
+				goto done;
+			}
+		}
+		bnxt_re_copy_err_stats(rdev, stats, err_s);
 	}
 
-	return ARRAY_SIZE(bnxt_re_stat_name);
+done:
+	return bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) ?
+		BNXT_RE_NUM_EXT_COUNTERS : BNXT_RE_NUM_STD_COUNTERS;
 }
 
 struct rdma_hw_stats *bnxt_re_ib_alloc_hw_port_stats(struct ib_device *ibdev,
 						     u32 port_num)
 {
-	BUILD_BUG_ON(ARRAY_SIZE(bnxt_re_stat_name) != BNXT_RE_NUM_COUNTERS);
+	struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
+	int num_counters = 0;
+
+	if (bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx))
+		num_counters = BNXT_RE_NUM_EXT_COUNTERS;
+	else
+		num_counters = BNXT_RE_NUM_STD_COUNTERS;
 
 	return rdma_alloc_hw_stats_struct(bnxt_re_stat_name,
-					  ARRAY_SIZE(bnxt_re_stat_name),
+					  num_counters,
 					  RDMA_HW_STATS_DEFAULT_LIFESPAN);
 }
--- a/drivers/infiniband/hw/bnxt_re/hw_counters.h
+++ b/drivers/infiniband/hw/bnxt_re/hw_counters.h
@@ -41,6 +41,8 @@
 #define __BNXT_RE_HW_STATS_H__
 
 enum bnxt_re_hw_stats {
+	BNXT_RE_ACTIVE_PD,
+	BNXT_RE_ACTIVE_AH,
 	BNXT_RE_ACTIVE_QP,
 	BNXT_RE_ACTIVE_SRQ,
 	BNXT_RE_ACTIVE_CQ,
@@ -93,7 +95,31 @@ enum bnxt_re_hw_stats {
 	BNXT_RE_RES_TX_PCI_ERR,
 	BNXT_RE_RES_RX_PCI_ERR,
 	BNXT_RE_OUT_OF_SEQ_ERR,
-	BNXT_RE_NUM_COUNTERS
+	BNXT_RE_TX_ATOMIC_REQ,
+	BNXT_RE_TX_READ_REQ,
+	BNXT_RE_TX_READ_RES,
+	BNXT_RE_TX_WRITE_REQ,
+	BNXT_RE_TX_SEND_REQ,
+	BNXT_RE_RX_ATOMIC_REQ,
+	BNXT_RE_RX_READ_REQ,
+	BNXT_RE_RX_READ_RESP,
+	BNXT_RE_RX_WRITE_REQ,
+	BNXT_RE_RX_SEND_REQ,
+	BNXT_RE_RX_ROCE_GOOD_PKTS,
+	BNXT_RE_RX_ROCE_GOOD_BYTES,
+	BNXT_RE_OOB,
+	BNXT_RE_NUM_EXT_COUNTERS
+};
+
+#define BNXT_RE_NUM_STD_COUNTERS (BNXT_RE_OUT_OF_SEQ_ERR + 1)
+
+struct bnxt_re_rstat {
+	struct bnxt_qplib_roce_stats    errs;
+	struct bnxt_qplib_ext_stat      ext_stat;
+};
+
+struct bnxt_re_stats {
+	struct bnxt_re_rstat            rstat;
 };
 
 struct rdma_hw_stats *bnxt_re_ib_alloc_hw_port_stats(struct ib_device *ibdev,
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -541,9 +541,12 @@ int bnxt_re_dealloc_pd(struct ib_pd *ib_
 
 	bnxt_re_destroy_fence_mr(pd);
 
-	if (pd->qplib_pd.id)
-		bnxt_qplib_dealloc_pd(&rdev->qplib_res, &rdev->qplib_res.pd_tbl,
-				      &pd->qplib_pd);
+	if (pd->qplib_pd.id) {
+		if (!bnxt_qplib_dealloc_pd(&rdev->qplib_res,
+					   &rdev->qplib_res.pd_tbl,
+					   &pd->qplib_pd))
+			atomic_dec(&rdev->pd_count);
+	}
 	return 0;
 }
 
@@ -595,6 +598,8 @@ int bnxt_re_alloc_pd(struct ib_pd *ibpd,
 		if (bnxt_re_create_fence_mr(pd))
 			ibdev_warn(&rdev->ibdev,
 				   "Failed to create Fence-MR\n");
+	atomic_inc(&rdev->pd_count);
+
 	return 0;
 dbfail:
 	bnxt_qplib_dealloc_pd(&rdev->qplib_res, &rdev->qplib_res.pd_tbl,
@@ -611,6 +616,8 @@ int bnxt_re_destroy_ah(struct ib_ah *ib_
 
 	bnxt_qplib_destroy_ah(&rdev->qplib_res, &ah->qplib_ah,
 			      !(flags & RDMA_DESTROY_AH_SLEEPABLE));
+	atomic_dec(&rdev->ah_count);
+
 	return 0;
 }
 
@@ -695,6 +702,7 @@ int bnxt_re_create_ah(struct ib_ah *ib_a
 		wmb(); /* make sure cache is updated. */
 		spin_unlock_irqrestore(&uctx->sh_lock, flag);
 	}
+	atomic_inc(&rdev->ah_count);
 
 	return 0;
 }
@@ -760,6 +768,7 @@ static int bnxt_re_destroy_gsi_sqp(struc
 	bnxt_qplib_destroy_ah(&rdev->qplib_res,
 			      &gsi_sah->qplib_ah,
 			      true);
+	atomic_dec(&rdev->ah_count);
 	bnxt_qplib_clean_qp(&qp->qplib_qp);
 
 	ibdev_dbg(&rdev->ibdev, "Destroy the shadow QP\n");
@@ -1006,6 +1015,7 @@ static struct bnxt_re_ah *bnxt_re_create
 			  "Failed to allocate HW AH for Shadow QP");
 		goto fail;
 	}
+	atomic_inc(&rdev->ah_count);
 
 	return ah;
 
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -127,6 +127,8 @@ static int bnxt_re_setup_chip_ctx(struct
 
 	rdev->qplib_res.cctx = rdev->chip_ctx;
 	rdev->rcfw.res = &rdev->qplib_res;
+	rdev->qplib_res.dattr = &rdev->dev_attr;
+	rdev->qplib_res.is_vf = BNXT_VF(bp);
 
 	bnxt_re_set_drv_mode(rdev, wqe_mode);
 	if (bnxt_qplib_determine_atomics(en_dev->pdev))
@@ -777,6 +779,8 @@ static struct bnxt_re_dev *bnxt_re_dev_a
 	atomic_set(&rdev->srq_count, 0);
 	atomic_set(&rdev->mr_count, 0);
 	atomic_set(&rdev->mw_count, 0);
+	atomic_set(&rdev->ah_count, 0);
+	atomic_set(&rdev->pd_count, 0);
 	rdev->cosq[0] = 0xFFFF;
 	rdev->cosq[1] = 0xFFFF;
 
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
@@ -1049,6 +1049,9 @@ int bnxt_qplib_create_qp(struct bnxt_qpl
 		qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_FORCE_COMPLETION;
 	if (qp->wqe_mode == BNXT_QPLIB_WQE_MODE_VARIABLE)
 		qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_VARIABLE_SIZED_WQE_ENABLED;
+	if (_is_ext_stats_supported(res->dattr->dev_cap_flags) && !res->is_vf)
+		qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_EXT_STATS_ENABLED;
+
 	req.qp_flags = cpu_to_le32(qp_flags);
 
 	/* ORRQ and IRRQ */
--- a/drivers/infiniband/hw/bnxt_re/qplib_res.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h
@@ -253,14 +253,15 @@ struct bnxt_qplib_ctx {
 struct bnxt_qplib_res {
 	struct pci_dev			*pdev;
 	struct bnxt_qplib_chip_ctx	*cctx;
+	struct bnxt_qplib_dev_attr      *dattr;
 	struct net_device		*netdev;
-
 	struct bnxt_qplib_rcfw		*rcfw;
 	struct bnxt_qplib_pd_tbl	pd_tbl;
 	struct bnxt_qplib_sgid_tbl	sgid_tbl;
 	struct bnxt_qplib_pkey_tbl	pkey_tbl;
 	struct bnxt_qplib_dpi_tbl	dpi_tbl;
 	bool				prio;
+	bool                            is_vf;
 };
 
 static inline bool bnxt_qplib_is_chip_gen_p5(struct bnxt_qplib_chip_ctx *cctx)
@@ -450,4 +451,10 @@ static inline void bnxt_qplib_ring_nq_db
 	else
 		bnxt_qplib_ring_db32(info, arm);
 }
+
+static inline bool _is_ext_stats_supported(u16 dev_cap_flags)
+{
+	return dev_cap_flags &
+		CREQ_QUERY_FUNC_RESP_SB_EXT_STATS;
+}
 #endif /* __BNXT_QPLIB_RES_H__ */
--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
@@ -161,6 +161,7 @@ int bnxt_qplib_get_dev_attr(struct bnxt_
 	attr->l2_db_size = (sb->l2_db_space_size + 1) *
 			    (0x01 << RCFW_DBR_BASE_PAGE_SHIFT);
 	attr->max_sgid = BNXT_QPLIB_NUM_GIDS_SUPPORTED;
+	attr->dev_cap_flags = le16_to_cpu(sb->dev_cap_flags);
 
 	bnxt_qplib_query_version(rcfw, attr->fw_ver);
 
@@ -867,5 +868,55 @@ int bnxt_qplib_get_roce_stats(struct bnx
 
 bail:
 	bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf);
+	return rc;
+}
+
+int bnxt_qplib_qext_stat(struct bnxt_qplib_rcfw *rcfw, u32 fid,
+			 struct bnxt_qplib_ext_stat *estat)
+{
+	struct creq_query_roce_stats_ext_resp resp = {};
+	struct creq_query_roce_stats_ext_resp_sb *sb;
+	struct cmdq_query_roce_stats_ext req = {};
+	struct bnxt_qplib_rcfw_sbuf *sbuf;
+	u16 cmd_flags = 0;
+	int rc;
+
+	sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb));
+	if (!sbuf) {
+		dev_err(&rcfw->pdev->dev,
+			"SP: QUERY_ROCE_STATS_EXT alloc sb failed");
+		return -ENOMEM;
+	}
+
+	RCFW_CMD_PREP(req, QUERY_ROCE_STATS_EXT, cmd_flags);
+
+	req.resp_size = ALIGN(sizeof(*sb), BNXT_QPLIB_CMDQE_UNITS);
+	req.resp_addr = cpu_to_le64(sbuf->dma_addr);
+	req.function_id = cpu_to_le32(fid);
+	req.flags = cpu_to_le16(CMDQ_QUERY_ROCE_STATS_EXT_FLAGS_FUNCTION_ID);
+
+	rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+					  (void *)&resp, (void *)sbuf, 0);
+	if (rc)
+		goto bail;
+
+	sb = sbuf->sb;
+	estat->tx_atomic_req = le64_to_cpu(sb->tx_atomic_req_pkts);
+	estat->tx_read_req = le64_to_cpu(sb->tx_read_req_pkts);
+	estat->tx_read_res = le64_to_cpu(sb->tx_read_res_pkts);
+	estat->tx_write_req = le64_to_cpu(sb->tx_write_req_pkts);
+	estat->tx_send_req = le64_to_cpu(sb->tx_send_req_pkts);
+	estat->rx_atomic_req = le64_to_cpu(sb->rx_atomic_req_pkts);
+	estat->rx_read_req = le64_to_cpu(sb->rx_read_req_pkts);
+	estat->rx_read_res = le64_to_cpu(sb->rx_read_res_pkts);
+	estat->rx_write_req = le64_to_cpu(sb->rx_write_req_pkts);
+	estat->rx_send_req = le64_to_cpu(sb->rx_send_req_pkts);
+	estat->rx_roce_good_pkts = le64_to_cpu(sb->rx_roce_good_pkts);
+	estat->rx_roce_good_bytes = le64_to_cpu(sb->rx_roce_good_bytes);
+	estat->rx_out_of_buffer = le64_to_cpu(sb->rx_out_of_buffer_pkts);
+	estat->rx_out_of_sequence = le64_to_cpu(sb->rx_out_of_sequence_pkts);
+
+bail:
+	bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf);
 	return rc;
 }
--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.h
@@ -71,6 +71,7 @@ struct bnxt_qplib_dev_attr {
 	u32				l2_db_size;
 	u8				tqm_alloc_reqs[MAX_TQM_ALLOC_REQ];
 	bool				is_atomic;
+	u16                             dev_cap_flags;
 };
 
 struct bnxt_qplib_pd {
@@ -219,6 +220,30 @@ struct bnxt_qplib_roce_stats {
 	/* port 3 active qps */
 };
 
+struct bnxt_qplib_ext_stat {
+	u64  tx_atomic_req;
+	u64  tx_read_req;
+	u64  tx_read_res;
+	u64  tx_write_req;
+	u64  tx_send_req;
+	u64  tx_roce_pkts;
+	u64  tx_roce_bytes;
+	u64  rx_atomic_req;
+	u64  rx_read_req;
+	u64  rx_read_res;
+	u64  rx_write_req;
+	u64  rx_send_req;
+	u64  rx_roce_pkts;
+	u64  rx_roce_bytes;
+	u64  rx_roce_good_pkts;
+	u64  rx_roce_good_bytes;
+	u64  rx_out_of_buffer;
+	u64  rx_out_of_sequence;
+	u64  tx_cnp;
+	u64  rx_cnp;
+	u64  rx_ecn_marked;
+};
+
 int bnxt_qplib_get_sgid(struct bnxt_qplib_res *res,
 			struct bnxt_qplib_sgid_tbl *sgid_tbl, int index,
 			struct bnxt_qplib_gid *gid);
@@ -263,4 +288,7 @@ int bnxt_qplib_free_fast_reg_page_list(s
 int bnxt_qplib_map_tc2cos(struct bnxt_qplib_res *res, u16 *cids);
 int bnxt_qplib_get_roce_stats(struct bnxt_qplib_rcfw *rcfw,
 			      struct bnxt_qplib_roce_stats *stats);
+int bnxt_qplib_qext_stat(struct bnxt_qplib_rcfw *rcfw, u32 fid,
+			 struct bnxt_qplib_ext_stat *estat);
+
 #endif /* __BNXT_QPLIB_SP_H__*/
--- a/drivers/infiniband/hw/bnxt_re/roce_hsi.h
+++ b/drivers/infiniband/hw/bnxt_re/roce_hsi.h
@@ -1102,6 +1102,7 @@ struct cmdq_base {
 	#define CMDQ_BASE_OPCODE_MODIFY_CC			   0x8cUL
 	#define CMDQ_BASE_OPCODE_QUERY_CC			   0x8dUL
 	#define CMDQ_BASE_OPCODE_QUERY_ROCE_STATS	   0x8eUL
+	#define CMDQ_BASE_OPCODE_QUERY_ROCE_STATS_EXT      0x92UL
 	u8 cmd_size;
 	__le16 flags;
 	__le16 cookie;
@@ -1127,6 +1128,10 @@ struct cmdq_create_qp {
 	#define CMDQ_CREATE_QP_QP_FLAGS_RESERVED_LKEY_ENABLE      0x4UL
 	#define CMDQ_CREATE_QP_QP_FLAGS_FR_PMR_ENABLED		   0x8UL
 	#define CMDQ_CREATE_QP_QP_FLAGS_VARIABLE_SIZED_WQE_ENABLED 0x10UL
+	#define CMDQ_CREATE_QP_QP_FLAGS_EXT_STATS_ENABLED          0x80UL
+	#define CMDQ_CREATE_QP_QP_FLAGS_LAST	\
+		CMDQ_CREATE_QP_QP_FLAGS_EXT_STATS_ENABLED
+
 	u8 type;
 	#define CMDQ_CREATE_QP_TYPE_RC				   0x2UL
 	#define CMDQ_CREATE_QP_TYPE_UD				   0x4UL
@@ -2848,6 +2853,7 @@ struct creq_query_func_resp_sb {
 	__le16 max_qp_wr;
 	__le16 dev_cap_flags;
 	#define CREQ_QUERY_FUNC_RESP_SB_DEV_CAP_FLAGS_RESIZE_QP   0x1UL
+	#define CREQ_QUERY_FUNC_RESP_SB_EXT_STATS                 0x10UL
 	__le32 max_cq;
 	__le32 max_cqe;
 	__le32 max_pd;
@@ -3087,6 +3093,85 @@ struct creq_query_roce_stats_resp_sb {
 	__le64  active_qp_count_p3;
 };
 
+/* cmdq_query_roce_stats_ext (size:192b/24B) */
+struct cmdq_query_roce_stats_ext {
+	u8      opcode;
+	#define CMDQ_QUERY_ROCE_STATS_EXT_OPCODE_QUERY_ROCE_STATS 0x92UL
+	#define CMDQ_QUERY_ROCE_STATS_EXT_OPCODE_LAST            \
+		CMDQ_QUERY_ROCE_STATS_EXT_OPCODE_QUERY_ROCE_STATS
+	u8      cmd_size;
+	__le16  flags;
+	#define CMDQ_QUERY_ROCE_STATS_EXT_FLAGS_COLLECTION_ID     0x1UL
+	#define CMDQ_QUERY_ROCE_STATS_EXT_FLAGS_FUNCTION_ID       0x2UL
+	__le16  cookie;
+	u8      resp_size;
+	u8      collection_id;
+	__le64  resp_addr;
+	__le32  function_id;
+	#define CMDQ_QUERY_ROCE_STATS_EXT_PF_NUM_MASK  0xffUL
+	#define CMDQ_QUERY_ROCE_STATS_EXT_PF_NUM_SFT   0
+	#define CMDQ_QUERY_ROCE_STATS_EXT_VF_NUM_MASK  0xffff00UL
+	#define CMDQ_QUERY_ROCE_STATS_EXT_VF_NUM_SFT   8
+	#define CMDQ_QUERY_ROCE_STATS_EXT_VF_VALID     0x1000000UL
+	__le32  reserved32;
+};
+
+/* creq_query_roce_stats_ext_resp (size:128b/16B) */
+struct creq_query_roce_stats_ext_resp {
+	u8      type;
+	#define CREQ_QUERY_ROCE_STATS_EXT_RESP_TYPE_MASK    0x3fUL
+	#define CREQ_QUERY_ROCE_STATS_EXT_RESP_TYPE_SFT     0
+	#define CREQ_QUERY_ROCE_STATS_EXT_RESP_TYPE_QP_EVENT  0x38UL
+	#define CREQ_QUERY_ROCE_STATS_EXT_RESP_TYPE_LAST     \
+		CREQ_QUERY_ROCE_STATS_EXT_RESP_TYPE_QP_EVENT
+	u8      status;
+	__le16  cookie;
+	__le32  size;
+	u8      v;
+	#define CREQ_QUERY_ROCE_STATS_EXT_RESP_V     0x1UL
+	u8      event;
+	#define CREQ_QUERY_ROCE_STATS_EXT_RESP_EVENT_QUERY_ROCE_STATS_EXT 0x92UL
+	#define CREQ_QUERY_ROCE_STATS_EXT_RESP_EVENT_LAST \
+		CREQ_QUERY_ROCE_STATS_EXT_RESP_EVENT_QUERY_ROCE_STATS_EXT
+	u8      reserved48[6];
+};
+
+/* creq_query_roce_stats_ext_resp_sb (size:1536b/192B) */
+struct creq_query_roce_stats_ext_resp_sb {
+	u8      opcode;
+	#define CREQ_QUERY_ROCE_STATS_EXT_RESP_SB_OPCODE_QUERY_ROCE_STATS_EXT 0x92UL
+	#define CREQ_QUERY_ROCE_STATS_EXT_RESP_SB_OPCODE_LAST \
+		CREQ_QUERY_ROCE_STATS_EXT_RESP_SB_OPCODE_QUERY_ROCE_STATS_EXT
+	u8      status;
+	__le16  cookie;
+	__le16  flags;
+	u8      resp_size;
+	u8      rsvd;
+	__le64  tx_atomic_req_pkts;
+	__le64  tx_read_req_pkts;
+	__le64  tx_read_res_pkts;
+	__le64  tx_write_req_pkts;
+	__le64  tx_send_req_pkts;
+	__le64  tx_roce_pkts;
+	__le64  tx_roce_bytes;
+	__le64  rx_atomic_req_pkts;
+	__le64  rx_read_req_pkts;
+	__le64  rx_read_res_pkts;
+	__le64  rx_write_req_pkts;
+	__le64  rx_send_req_pkts;
+	__le64  rx_roce_pkts;
+	__le64  rx_roce_bytes;
+	__le64  rx_roce_good_pkts;
+	__le64  rx_roce_good_bytes;
+	__le64  rx_out_of_buffer_pkts;
+	__le64  rx_out_of_sequence_pkts;
+	__le64  tx_cnp_pkts;
+	__le64  rx_cnp_pkts;
+	__le64  rx_ecn_marked_pkts;
+	__le64  tx_cnp_bytes;
+	__le64  rx_cnp_bytes;
+};
+
 /* QP error notification event (16 bytes) */
 struct creq_qp_error_notification {
 	u8 type;