Blob Blame History Raw
From: Tal Gilboa <talgi@mellanox.com>
Date: Tue, 24 Apr 2018 13:36:03 +0300
Subject: net/mlx5e: Enable adaptive-TX moderation
Patch-mainline: v4.18-rc1
Git-commit: cbce4f44479856cd4621e3dce531cfb078357b1f
References: bsc#1103990 FATE#326006

Add support for adaptive TX moderation. This greatly reduces TX interrupt
rate and increases bandwidth, mostly for TCP bandwidth over ARM
architecture (below). There is a slight single stream TCP with very large
message sizes degradation (x86). In this case if there's any moderation on
transmitted packets the bandwidth would reduce due to hitting TCP output limit.
Since this is a synthetic case, this is still worth doing.

Performance improvement (ConnectX-4Lx 40GbE, ARM)
TCP 64B bandwidth with 1-50 streams increased 6-35%.
TCP 64B bandwidth with 100-500 streams increased 20-70%.

Performance improvement (ConnectX-5 100GbE, x86)
Bandwidth: increased up to 40% (1024B with 10s of streams).
Interrupt rate: reduced up to 50% (1024B with 1000s of streams).

Performance degradation (ConnectX-5 100GbE, x86)
Bandwidth: up to 10% decrease single stream TCP (1MB message size from
51Gb/s to 47Gb/s).

Signed-off-by: Tal Gilboa <talgi@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Acked-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Acked-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h         |    4 
 drivers/net/ethernet/mellanox/mlx5/core/en_dim.c     |   24 ++++-
 drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c |   35 +++++---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c    |   81 +++++++++++--------
 drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c    |   37 ++++++--
 5 files changed, 125 insertions(+), 56 deletions(-)

--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -241,6 +241,7 @@ struct mlx5e_params {
 	bool vlan_strip_disable;
 	bool scatter_fcs_en;
 	bool rx_dim_enabled;
+	bool tx_dim_enabled;
 	u32 lro_timeout;
 	u32 pflags;
 	struct bpf_prog *xdp_prog;
@@ -330,6 +331,7 @@ enum {
 	MLX5E_SQ_STATE_ENABLED,
 	MLX5E_SQ_STATE_RECOVERING,
 	MLX5E_SQ_STATE_IPSEC,
+	MLX5E_SQ_STATE_AM,
 };
 
 struct mlx5e_sq_wqe_info {
@@ -342,6 +344,7 @@ struct mlx5e_txqsq {
 	/* dirtied @completion */
 	u16                        cc;
 	u32                        dma_fifo_cc;
+	struct net_dim             dim; /* Adaptive Moderation */
 
 	/* dirtied @xmit */
 	u16                        pc ____cacheline_aligned_in_smp;
@@ -1111,4 +1114,5 @@ void mlx5e_build_nic_params(struct mlx5_
 			    u16 max_channels, u16 mtu);
 u8 mlx5e_params_calculate_tx_min_inline(struct mlx5_core_dev *mdev);
 void mlx5e_rx_dim_work(struct work_struct *work);
+void mlx5e_tx_dim_work(struct work_struct *work);
 #endif /* __MLX5_EN_H__ */
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c
@@ -33,16 +33,30 @@
 #include <linux/net_dim.h>
 #include "en.h"
 
+static void
+mlx5e_complete_dim_work(struct net_dim *dim, struct net_dim_cq_moder moder,
+			struct mlx5_core_dev *mdev, struct mlx5_core_cq *mcq)
+{
+	mlx5_core_modify_cq_moderation(mdev, mcq, moder.usec, moder.pkts);
+	dim->state = NET_DIM_START_MEASURE;
+}
+
 void mlx5e_rx_dim_work(struct work_struct *work)
 {
-	struct net_dim *dim = container_of(work, struct net_dim,
-					   work);
+	struct net_dim *dim = container_of(work, struct net_dim, work);
 	struct mlx5e_rq *rq = container_of(dim, struct mlx5e_rq, dim);
 	struct net_dim_cq_moder cur_moder =
 		net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
 
-	mlx5_core_modify_cq_moderation(rq->mdev, &rq->cq.mcq,
-				       cur_moder.usec, cur_moder.pkts);
+	mlx5e_complete_dim_work(dim, cur_moder, rq->mdev, &rq->cq.mcq);
+}
 
-	dim->state = NET_DIM_START_MEASURE;
+void mlx5e_tx_dim_work(struct work_struct *work)
+{
+	struct net_dim *dim = container_of(work, struct net_dim, work);
+	struct mlx5e_txqsq *sq = container_of(dim, struct mlx5e_txqsq, dim);
+	struct net_dim_cq_moder cur_moder =
+		net_dim_get_tx_moderation(dim->mode, dim->profile_ix);
+
+	mlx5e_complete_dim_work(dim, cur_moder, sq->cq.mdev, &sq->cq.mcq);
 }
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -389,14 +389,20 @@ static int mlx5e_set_channels(struct net
 int mlx5e_ethtool_get_coalesce(struct mlx5e_priv *priv,
 			       struct ethtool_coalesce *coal)
 {
+	struct net_dim_cq_moder *rx_moder, *tx_moder;
+
 	if (!MLX5_CAP_GEN(priv->mdev, cq_moderation))
 		return -EOPNOTSUPP;
 
-	coal->rx_coalesce_usecs       = priv->channels.params.rx_cq_moderation.usec;
-	coal->rx_max_coalesced_frames = priv->channels.params.rx_cq_moderation.pkts;
-	coal->tx_coalesce_usecs       = priv->channels.params.tx_cq_moderation.usec;
-	coal->tx_max_coalesced_frames = priv->channels.params.tx_cq_moderation.pkts;
-	coal->use_adaptive_rx_coalesce = priv->channels.params.rx_dim_enabled;
+	rx_moder = &priv->channels.params.rx_cq_moderation;
+	coal->rx_coalesce_usecs		= rx_moder->usec;
+	coal->rx_max_coalesced_frames	= rx_moder->pkts;
+	coal->use_adaptive_rx_coalesce	= priv->channels.params.rx_dim_enabled;
+
+	tx_moder = &priv->channels.params.tx_cq_moderation;
+	coal->tx_coalesce_usecs		= tx_moder->usec;
+	coal->tx_max_coalesced_frames	= tx_moder->pkts;
+	coal->use_adaptive_tx_coalesce	= priv->channels.params.tx_dim_enabled;
 
 	return 0;
 }
@@ -438,6 +444,7 @@ mlx5e_set_priv_channels_coalesce(struct
 int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv,
 			       struct ethtool_coalesce *coal)
 {
+	struct net_dim_cq_moder *rx_moder, *tx_moder;
 	struct mlx5_core_dev *mdev = priv->mdev;
 	struct mlx5e_channels new_channels = {};
 	int err = 0;
@@ -463,11 +470,15 @@ int mlx5e_ethtool_set_coalesce(struct ml
 	mutex_lock(&priv->state_lock);
 	new_channels.params = priv->channels.params;
 
-	new_channels.params.tx_cq_moderation.usec = coal->tx_coalesce_usecs;
-	new_channels.params.tx_cq_moderation.pkts = coal->tx_max_coalesced_frames;
-	new_channels.params.rx_cq_moderation.usec = coal->rx_coalesce_usecs;
-	new_channels.params.rx_cq_moderation.pkts = coal->rx_max_coalesced_frames;
-	new_channels.params.rx_dim_enabled        = !!coal->use_adaptive_rx_coalesce;
+	rx_moder          = &new_channels.params.rx_cq_moderation;
+	rx_moder->usec    = coal->rx_coalesce_usecs;
+	rx_moder->pkts    = coal->rx_max_coalesced_frames;
+	new_channels.params.rx_dim_enabled = !!coal->use_adaptive_rx_coalesce;
+
+	tx_moder          = &new_channels.params.tx_cq_moderation;
+	tx_moder->usec    = coal->tx_coalesce_usecs;
+	tx_moder->pkts    = coal->tx_max_coalesced_frames;
+	new_channels.params.tx_dim_enabled = !!coal->use_adaptive_tx_coalesce;
 
 	if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
 		priv->channels.params = new_channels.params;
@@ -475,7 +486,9 @@ int mlx5e_ethtool_set_coalesce(struct ml
 	}
 	/* we are opened */
 
-	reset = !!coal->use_adaptive_rx_coalesce != priv->channels.params.rx_dim_enabled;
+	reset = (!!coal->use_adaptive_rx_coalesce != priv->channels.params.rx_dim_enabled) ||
+		(!!coal->use_adaptive_tx_coalesce != priv->channels.params.tx_dim_enabled);
+
 	if (!reset) {
 		mlx5e_set_priv_channels_coalesce(priv, coal);
 		priv->channels.params = new_channels.params;
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -1025,6 +1025,9 @@ static int mlx5e_alloc_txqsq(struct mlx5
 	if (err)
 		goto err_sq_wq_destroy;
 
+	INIT_WORK(&sq->dim.work, mlx5e_tx_dim_work);
+	sq->dim.mode = params->tx_cq_moderation.cq_period_mode;
+
 	sq->edge = (sq->wq.sz_m1 + 1) - MLX5_SEND_WQE_MAX_WQEBBS;
 
 	return 0;
@@ -1188,6 +1191,9 @@ static int mlx5e_open_txqsq(struct mlx5e
 	if (tx_rate)
 		mlx5e_set_sq_maxrate(c->netdev, sq, tx_rate);
 
+	if (params->tx_dim_enabled)
+		sq->state |= BIT(MLX5E_SQ_STATE_AM);
+
 	return 0;
 
 err_free_txqsq:
@@ -4084,18 +4090,48 @@ static bool slow_pci_heuristic(struct ml
 		link_speed > MLX5E_SLOW_PCI_RATIO * pci_bw;
 }
 
-void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode)
+static struct net_dim_cq_moder mlx5e_get_def_tx_moderation(u8 cq_period_mode)
 {
-	params->tx_cq_moderation.cq_period_mode = cq_period_mode;
+	struct net_dim_cq_moder moder;
+
+	moder.cq_period_mode = cq_period_mode;
+	moder.pkts = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS;
+	moder.usec = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC;
+	if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE)
+		moder.usec = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC_FROM_CQE;
+
+	return moder;
+}
 
-	params->tx_cq_moderation.pkts =
-		MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS;
-	params->tx_cq_moderation.usec =
-		MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC;
+static struct net_dim_cq_moder mlx5e_get_def_rx_moderation(u8 cq_period_mode)
+{
+	struct net_dim_cq_moder moder;
 
+	moder.cq_period_mode = cq_period_mode;
+	moder.pkts = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS;
+	moder.usec = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC;
 	if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE)
-		params->tx_cq_moderation.usec =
-			MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC_FROM_CQE;
+		moder.usec = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE;
+
+	return moder;
+}
+
+static u8 mlx5_to_net_dim_cq_period_mode(u8 cq_period_mode)
+{
+	return cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE ?
+		NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE :
+		NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+}
+
+void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode)
+{
+	if (params->tx_dim_enabled) {
+		u8 dim_period_mode = mlx5_to_net_dim_cq_period_mode(cq_period_mode);
+
+		params->tx_cq_moderation = net_dim_get_def_tx_moderation(dim_period_mode);
+	} else {
+		params->tx_cq_moderation = mlx5e_get_def_tx_moderation(cq_period_mode);
+	}
 
 	MLX5E_SET_PFLAG(params, MLX5E_PFLAG_TX_CQE_BASED_MODER,
 			params->tx_cq_moderation.cq_period_mode ==
@@ -4104,30 +4140,12 @@ void mlx5e_set_tx_cq_mode_params(struct
 
 void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode)
 {
-	params->rx_cq_moderation.cq_period_mode = cq_period_mode;
-
-	params->rx_cq_moderation.pkts =
-		MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS;
-	params->rx_cq_moderation.usec =
-		MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC;
-
-	if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE)
-		params->rx_cq_moderation.usec =
-			MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE;
-
 	if (params->rx_dim_enabled) {
-		switch (cq_period_mode) {
-		case MLX5_CQ_PERIOD_MODE_START_FROM_CQE:
-			params->rx_cq_moderation =
-				net_dim_get_def_rx_moderation(
-					NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE);
-			break;
-		case MLX5_CQ_PERIOD_MODE_START_FROM_EQE:
-		default:
-			params->rx_cq_moderation =
-				net_dim_get_def_rx_moderation(
-					NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE);
-		}
+		u8 dim_period_mode = mlx5_to_net_dim_cq_period_mode(cq_period_mode);
+
+		params->rx_cq_moderation = net_dim_get_def_rx_moderation(dim_period_mode);
+	} else {
+		params->rx_cq_moderation = mlx5e_get_def_rx_moderation(cq_period_mode);
 	}
 
 	MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_BASED_MODER,
@@ -4191,6 +4209,7 @@ void mlx5e_build_nic_params(struct mlx5_
 			MLX5_CQ_PERIOD_MODE_START_FROM_CQE :
 			MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
 	params->rx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation);
+	params->tx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation);
 	mlx5e_set_rx_cq_mode_params(params, rx_cq_period_mode);
 	mlx5e_set_tx_cq_mode_params(params, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
 
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
@@ -44,6 +44,30 @@ static inline bool mlx5e_channel_no_affi
 	return cpumask_test_cpu(current_cpu, aff);
 }
 
+static void mlx5e_handle_tx_dim(struct mlx5e_txqsq *sq)
+{
+	struct net_dim_sample dim_sample;
+
+	if (unlikely(!MLX5E_TEST_BIT(sq->state, MLX5E_SQ_STATE_AM)))
+		return;
+
+	net_dim_sample(sq->cq.event_ctr, sq->stats.packets, sq->stats.bytes,
+		       &dim_sample);
+	net_dim(&sq->dim, dim_sample);
+}
+
+static void mlx5e_handle_rx_dim(struct mlx5e_rq *rq)
+{
+	struct net_dim_sample dim_sample;
+
+	if (unlikely(!MLX5E_TEST_BIT(rq->state, MLX5E_RQ_STATE_AM)))
+		return;
+
+	net_dim_sample(rq->cq.event_ctr, rq->stats.packets, rq->stats.bytes,
+		       &dim_sample);
+	net_dim(&rq->dim, dim_sample);
+}
+
 int mlx5e_napi_poll(struct napi_struct *napi, int budget)
 {
 	struct mlx5e_channel *c = container_of(napi, struct mlx5e_channel,
@@ -75,18 +99,13 @@ int mlx5e_napi_poll(struct napi_struct *
 	if (unlikely(!napi_complete_done(napi, work_done)))
 		return work_done;
 
-	for (i = 0; i < c->num_tc; i++)
+	for (i = 0; i < c->num_tc; i++) {
+		mlx5e_handle_tx_dim(&c->sq[i]);
 		mlx5e_cq_arm(&c->sq[i].cq);
-
-	if (MLX5E_TEST_BIT(c->rq.state, MLX5E_RQ_STATE_AM)) {
-		struct net_dim_sample dim_sample;
-		net_dim_sample(c->rq.cq.event_ctr,
-			       c->rq.stats.packets,
-			       c->rq.stats.bytes,
-			       &dim_sample);
-		net_dim(&c->rq.dim, dim_sample);
 	}
 
+	mlx5e_handle_rx_dim(&c->rq);
+
 	mlx5e_cq_arm(&c->rq.cq);
 	mlx5e_cq_arm(&c->icosq.cq);