Blob Blame History Raw
From: Tariq Toukan <tariqt@mellanox.com>
Date: Thu, 15 Jun 2017 14:35:34 +0300
Subject: net/mlx4_en: Improve transmit CQ polling
Patch-mainline: v4.13-rc1
Git-commit: cc26a4908682698cafdb5bb917f19840aff1a149
References: bsc#1046299 FATE#322947

Several small performance improvements in TX CQ polling,
including:
- Compiler branch predictor hints.
- Minimize variables scope.
- More proper check of cq type.
- Use boolean instead of int for a binary indication.

Performance tests:
Tested on ConnectX3Pro, Intel(R) Xeon(R) CPU E5-2680 v3 @ 2.50GHz

Packet-rate tests for both regular stack and XDP use cases:
No noticeable gain, no degradation.

Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Reviewed-by: Saeed Mahameed <saeedm@mellanox.com>
Cc: kernel-team@fb.com
Cc: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Acked-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
---
 drivers/net/ethernet/mellanox/mlx4/en_tx.c |   13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -402,8 +402,7 @@ static bool mlx4_en_process_tx_cq(struct
 	struct mlx4_cq *mcq = &cq->mcq;
 	struct mlx4_en_tx_ring *ring = priv->tx_ring[cq->type][cq->ring];
 	struct mlx4_cqe *cqe;
-	u16 index;
-	u16 new_index, ring_index, stamp_index;
+	u16 index, ring_index, stamp_index;
 	u32 txbbs_skipped = 0;
 	u32 txbbs_stamp = 0;
 	u32 cons_index = mcq->cons_index;
@@ -418,7 +417,7 @@ static bool mlx4_en_process_tx_cq(struct
 	u32 last_nr_txbb;
 	u32 ring_cons;
 
-	if (!priv->port_up)
+	if (unlikely(!priv->port_up))
 		return true;
 
 	netdev_txq_bql_complete_prefetchw(ring->tx_queue);
@@ -433,6 +432,8 @@ static bool mlx4_en_process_tx_cq(struct
 	/* Process all completed CQEs */
 	while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK,
 			cons_index & size) && (done < budget)) {
+		u16 new_index;
+
 		/*
 		 * make sure we read the CQE after we read the
 		 * ownership bit
@@ -479,7 +480,6 @@ static bool mlx4_en_process_tx_cq(struct
 		cqe = mlx4_en_get_cqe(buf, index, priv->cqe_size) + factor;
 	}
 
-
 	/*
 	 * To prevent CQ overflow we first update CQ consumer and only then
 	 * the ring consumer.
@@ -492,7 +492,7 @@ static bool mlx4_en_process_tx_cq(struct
 	ACCESS_ONCE(ring->last_nr_txbb) = last_nr_txbb;
 	ACCESS_ONCE(ring->cons) = ring_cons + txbbs_skipped;
 
-	if (ring->free_tx_desc == mlx4_en_recycle_tx_desc)
+	if (cq->type == TX_XDP)
 		return done < budget;
 
 	netdev_tx_completed_queue(ring->tx_queue, packets, bytes);
@@ -504,6 +504,7 @@ static bool mlx4_en_process_tx_cq(struct
 		netif_tx_wake_queue(ring->tx_queue);
 		ring->wake_queue++;
 	}
+
 	return done < budget;
 }
 
@@ -524,7 +525,7 @@ int mlx4_en_poll_tx_cq(struct napi_struc
 	struct mlx4_en_cq *cq = container_of(napi, struct mlx4_en_cq, napi);
 	struct net_device *dev = cq->dev;
 	struct mlx4_en_priv *priv = netdev_priv(dev);
-	int clean_complete;
+	bool clean_complete;
 
 	clean_complete = mlx4_en_process_tx_cq(dev, cq, budget);
 	if (!clean_complete)