Blob Blame History Raw
From: Ilan Tayari <ilant@mellanox.com>
Date: Tue, 18 Apr 2017 16:08:23 +0300
Subject: net/mlx5e: IPSec, Add Innova IPSec offload TX data path
Patch-mainline: v4.13-rc1
Git-commit: 2ac9cfe78223bb88be8cff3b59e0e13551b4e29c
References: bsc#1046303 FATE#322944

In the TX data path, prepend a special metadata ethertype which
instructs the hardware to perform cryptography.

In addition, fill Software-Parser segment in TX descriptor so
that the hardware may parse the ESP protocol, and perform TX
checksum offload on the inner payload.

Support GSO, by providing the inverse of gso_size in the metadata.
This allows the FPGA to update the ESP header (seqno and seqiv) on the
resulting packets, by calculating the packet number within the GSO
back from the TCP sequence number.

Note that for GSO SKBs, the stack does not include an ESP trailer,
unlike the non-GSO case.

Signed-off-by: Ilan Tayari <ilant@mellanox.com>
Signed-off-by: Yossi Kuperman <yossiku@mellanox.com>
Signed-off-by: Yevgeny Kliteynik <kliteyn@mellanox.com>
Signed-off-by: Boris Pismenny <borisp@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Acked-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h                  |    1 
 drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c      |   27 +
 drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h      |   10 
 drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c |  243 ++++++++++
 drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h |   13 
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c             |    9 
 drivers/net/ethernet/mellanox/mlx5/core/en_tx.c               |   25 -
 7 files changed, 319 insertions(+), 9 deletions(-)

--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -336,6 +336,7 @@ struct mlx5e_sq_dma {
 
 enum {
 	MLX5E_SQ_STATE_ENABLED,
+	MLX5E_SQ_STATE_IPSEC,
 };
 
 struct mlx5e_sq_wqe_info {
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
@@ -399,10 +399,26 @@ void mlx5e_ipsec_cleanup(struct mlx5e_pr
 	priv->ipsec = NULL;
 }
 
+static bool mlx5e_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *x)
+{
+	if (x->props.family == AF_INET) {
+		/* Offload with IPv4 options is not supported yet */
+		if (ip_hdr(skb)->ihl > 5)
+			return false;
+	} else {
+		/* Offload with IPv6 extension headers is not support yet */
+		if (ipv6_ext_hdr(ipv6_hdr(skb)->nexthdr))
+			return false;
+	}
+
+	return true;
+}
+
 static const struct xfrmdev_ops mlx5e_ipsec_xfrmdev_ops = {
 	.xdo_dev_state_add	= mlx5e_xfrm_add_state,
 	.xdo_dev_state_delete	= mlx5e_xfrm_del_state,
 	.xdo_dev_state_free	= mlx5e_xfrm_free_state,
+	.xdo_dev_offload_ok	= mlx5e_ipsec_offload_ok,
 };
 
 void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv)
@@ -431,4 +447,15 @@ void mlx5e_ipsec_build_netdev(struct mlx
 
 	netdev->features |= NETIF_F_HW_ESP_TX_CSUM;
 	netdev->hw_enc_features |= NETIF_F_HW_ESP_TX_CSUM;
+
+	if (!(mlx5_accel_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_LSO) ||
+	    !MLX5_CAP_ETH(mdev, swp_lso)) {
+		mlx5_core_dbg(mdev, "mlx5e: ESP LSO not supported\n");
+		return;
+	}
+
+	mlx5_core_dbg(mdev, "mlx5e: ESP GSO capability turned on\n");
+	netdev->features |= NETIF_F_GSO_ESP;
+	netdev->hw_features |= NETIF_F_GSO_ESP;
+	netdev->hw_enc_features |= NETIF_F_GSO_ESP;
 }
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
@@ -50,6 +50,11 @@ struct mlx5e_ipsec_sw_stats {
 	atomic64_t ipsec_rx_drop_sp_alloc;
 	atomic64_t ipsec_rx_drop_sadb_miss;
 	atomic64_t ipsec_rx_drop_syndrome;
+	atomic64_t ipsec_tx_drop_bundle;
+	atomic64_t ipsec_tx_drop_no_state;
+	atomic64_t ipsec_tx_drop_not_ip;
+	atomic64_t ipsec_tx_drop_trailer;
+	atomic64_t ipsec_tx_drop_metadata;
 };
 
 struct mlx5e_ipsec {
@@ -60,6 +65,7 @@ struct mlx5e_ipsec {
 	struct mlx5e_ipsec_sw_stats sw_stats;
 };
 
+void mlx5e_ipsec_build_inverse_table(void);
 int mlx5e_ipsec_init(struct mlx5e_priv *priv);
 void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv);
 void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv);
@@ -69,6 +75,10 @@ struct xfrm_state *mlx5e_ipsec_sadb_rx_l
 
 #else
 
+static inline void mlx5e_ipsec_build_inverse_table(void)
+{
+}
+
 static inline int mlx5e_ipsec_init(struct mlx5e_priv *priv)
 {
 	return 0;
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
@@ -33,6 +33,7 @@
 
 #include <crypto/aead.h>
 #include <net/xfrm.h>
+#include <net/esp.h>
 
 #include "en_accel/ipsec_rxtx.h"
 #include "en_accel/ipsec.h"
@@ -48,17 +49,228 @@ struct mlx5e_ipsec_rx_metadata {
 	__be32		sa_handle;
 } __packed;
 
+enum {
+	MLX5E_IPSEC_TX_SYNDROME_OFFLOAD = 0x8,
+	MLX5E_IPSEC_TX_SYNDROME_OFFLOAD_WITH_LSO_TCP = 0x9,
+};
+
+struct mlx5e_ipsec_tx_metadata {
+	__be16 mss_inv;         /* 1/MSS in 16bit fixed point, only for LSO */
+	__be16 seq;             /* LSBs of the first TCP seq, only for LSO */
+	u8     esp_next_proto;  /* Next protocol of ESP */
+} __packed;
+
 struct mlx5e_ipsec_metadata {
 	unsigned char syndrome;
 	union {
 		unsigned char raw[5];
 		/* from FPGA to host, on successful decrypt */
 		struct mlx5e_ipsec_rx_metadata rx;
+		/* from host to FPGA */
+		struct mlx5e_ipsec_tx_metadata tx;
 	} __packed content;
 	/* packet type ID field	*/
 	__be16 ethertype;
 } __packed;
 
+#define MAX_LSO_MSS 2048
+
+/* Pre-calculated (Q0.16) fixed-point inverse 1/x function */
+static __be16 mlx5e_ipsec_inverse_table[MAX_LSO_MSS];
+
+static inline __be16 mlx5e_ipsec_mss_inv(struct sk_buff *skb)
+{
+	return mlx5e_ipsec_inverse_table[skb_shinfo(skb)->gso_size];
+}
+
+static struct mlx5e_ipsec_metadata *mlx5e_ipsec_add_metadata(struct sk_buff *skb)
+{
+	struct mlx5e_ipsec_metadata *mdata;
+	struct ethhdr *eth;
+
+	if (unlikely(skb_cow_head(skb, sizeof(*mdata))))
+		return ERR_PTR(-ENOMEM);
+
+	eth = (struct ethhdr *)skb_push(skb, sizeof(*mdata));
+	skb->mac_header -= sizeof(*mdata);
+	mdata = (struct mlx5e_ipsec_metadata *)(eth + 1);
+
+	memmove(skb->data, skb->data + sizeof(*mdata),
+		2 * ETH_ALEN);
+
+	eth->h_proto = cpu_to_be16(MLX5E_METADATA_ETHER_TYPE);
+
+	memset(mdata->content.raw, 0, sizeof(mdata->content.raw));
+	return mdata;
+}
+
+static int mlx5e_ipsec_remove_trailer(struct sk_buff *skb, struct xfrm_state *x)
+{
+	unsigned int alen = crypto_aead_authsize(x->data);
+	struct ipv6hdr *ipv6hdr = ipv6_hdr(skb);
+	struct iphdr *ipv4hdr = ip_hdr(skb);
+	unsigned int trailer_len;
+	u8 plen;
+	int ret;
+
+	ret = skb_copy_bits(skb, skb->len - alen - 2, &plen, 1);
+	if (unlikely(ret))
+		return ret;
+
+	trailer_len = alen + plen + 2;
+
+	pskb_trim(skb, skb->len - trailer_len);
+	if (skb->protocol == htons(ETH_P_IP)) {
+		ipv4hdr->tot_len = htons(ntohs(ipv4hdr->tot_len) - trailer_len);
+		ip_send_check(ipv4hdr);
+	} else {
+		ipv6hdr->payload_len = htons(ntohs(ipv6hdr->payload_len) -
+					     trailer_len);
+	}
+	return 0;
+}
+
+static void mlx5e_ipsec_set_swp(struct sk_buff *skb,
+				struct mlx5_wqe_eth_seg *eseg, u8 mode,
+				struct xfrm_offload *xo)
+{
+	u8 proto;
+
+	/* Tunnel Mode:
+	 * SWP:      OutL3       InL3  InL4
+	 * Pkt: MAC  IP     ESP  IP    L4
+	 *
+	 * Transport Mode:
+	 * SWP:      OutL3       InL4
+	 *           InL3
+	 * Pkt: MAC  IP     ESP  L4
+	 *
+	 * Offsets are in 2-byte words, counting from start of frame
+	 */
+	eseg->swp_outer_l3_offset = skb_network_offset(skb) / 2;
+	if (skb->protocol == htons(ETH_P_IPV6))
+		eseg->swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L3_IPV6;
+
+	if (mode == XFRM_MODE_TUNNEL) {
+		eseg->swp_inner_l3_offset = skb_inner_network_offset(skb) / 2;
+		if (xo->proto == IPPROTO_IPV6) {
+			eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6;
+			proto = inner_ipv6_hdr(skb)->nexthdr;
+		} else {
+			proto = inner_ip_hdr(skb)->protocol;
+		}
+	} else {
+		eseg->swp_inner_l3_offset = skb_network_offset(skb) / 2;
+		if (skb->protocol == htons(ETH_P_IPV6))
+			eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6;
+		proto = xo->proto;
+	}
+	switch (proto) {
+	case IPPROTO_UDP:
+		eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_UDP;
+		/* Fall through */
+	case IPPROTO_TCP:
+		eseg->swp_inner_l4_offset = skb_inner_transport_offset(skb) / 2;
+		break;
+	}
+}
+
+static void mlx5e_ipsec_set_iv(struct sk_buff *skb, struct xfrm_offload *xo)
+{
+	int iv_offset;
+	__be64 seqno;
+
+	/* Place the SN in the IV field */
+	seqno = cpu_to_be64(xo->seq.low + ((u64)xo->seq.hi << 32));
+	iv_offset = skb_transport_offset(skb) + sizeof(struct ip_esp_hdr);
+	skb_store_bits(skb, iv_offset, &seqno, 8);
+}
+
+static void mlx5e_ipsec_set_metadata(struct sk_buff *skb,
+				     struct mlx5e_ipsec_metadata *mdata,
+				     struct xfrm_offload *xo)
+{
+	struct ip_esp_hdr *esph;
+	struct tcphdr *tcph;
+
+	if (skb_is_gso(skb)) {
+		/* Add LSO metadata indication */
+		esph = ip_esp_hdr(skb);
+		tcph = inner_tcp_hdr(skb);
+		netdev_dbg(skb->dev, "   Offloading GSO packet outer L3 %u; L4 %u; Inner L3 %u; L4 %u\n",
+			   skb->network_header,
+			   skb->transport_header,
+			   skb->inner_network_header,
+			   skb->inner_transport_header);
+		netdev_dbg(skb->dev, "   Offloading GSO packet of len %u; mss %u; TCP sp %u dp %u seq 0x%x ESP seq 0x%x\n",
+			   skb->len, skb_shinfo(skb)->gso_size,
+			   ntohs(tcph->source), ntohs(tcph->dest),
+			   ntohl(tcph->seq), ntohl(esph->seq_no));
+		mdata->syndrome = MLX5E_IPSEC_TX_SYNDROME_OFFLOAD_WITH_LSO_TCP;
+		mdata->content.tx.mss_inv = mlx5e_ipsec_mss_inv(skb);
+		mdata->content.tx.seq = htons(ntohl(tcph->seq) & 0xFFFF);
+	} else {
+		mdata->syndrome = MLX5E_IPSEC_TX_SYNDROME_OFFLOAD;
+	}
+	mdata->content.tx.esp_next_proto = xo->proto;
+
+	netdev_dbg(skb->dev, "   TX metadata syndrome %u proto %u mss_inv %04x seq %04x\n",
+		   mdata->syndrome, mdata->content.tx.esp_next_proto,
+		   ntohs(mdata->content.tx.mss_inv),
+		   ntohs(mdata->content.tx.seq));
+}
+
+struct sk_buff *mlx5e_ipsec_handle_tx_skb(struct net_device *netdev,
+					  struct mlx5e_tx_wqe *wqe,
+					  struct sk_buff *skb)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+	struct xfrm_offload *xo = xfrm_offload(skb);
+	struct mlx5e_ipsec_metadata *mdata;
+	struct xfrm_state *x;
+
+	if (!xo)
+		return skb;
+
+	if (unlikely(skb->sp->len != 1)) {
+		atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_bundle);
+		goto drop;
+	}
+
+	x = xfrm_input_state(skb);
+	if (unlikely(!x)) {
+		atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_no_state);
+		goto drop;
+	}
+
+	if (unlikely(!x->xso.offload_handle ||
+		     (skb->protocol != htons(ETH_P_IP) &&
+		      skb->protocol != htons(ETH_P_IPV6)))) {
+		atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_not_ip);
+		goto drop;
+	}
+
+	if (!skb_is_gso(skb))
+		if (unlikely(mlx5e_ipsec_remove_trailer(skb, x))) {
+			atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_trailer);
+			goto drop;
+		}
+	mdata = mlx5e_ipsec_add_metadata(skb);
+	if (unlikely(IS_ERR(mdata))) {
+		atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_metadata);
+		goto drop;
+	}
+	mlx5e_ipsec_set_swp(skb, &wqe->eth, x->props.mode, xo);
+	mlx5e_ipsec_set_iv(skb, xo);
+	mlx5e_ipsec_set_metadata(skb, mdata, xo);
+
+	return skb;
+
+drop:
+	kfree_skb(skb);
+	return NULL;
+}
+
 static inline struct xfrm_state *
 mlx5e_ipsec_build_sp(struct net_device *netdev, struct sk_buff *skb,
 		     struct mlx5e_ipsec_metadata *mdata)
@@ -133,3 +345,34 @@ struct sk_buff *mlx5e_ipsec_handle_rx_sk
 
 	return skb;
 }
+
+bool mlx5e_ipsec_feature_check(struct sk_buff *skb, struct net_device *netdev,
+			       netdev_features_t features)
+{
+	struct xfrm_state *x;
+
+	if (skb->sp && skb->sp->len) {
+		x = skb->sp->xvec[0];
+		if (x && x->xso.offload_handle)
+			return true;
+	}
+	return false;
+}
+
+void mlx5e_ipsec_build_inverse_table(void)
+{
+	u16 mss_inv;
+	u32 mss;
+
+	/* Calculate 1/x inverse table for use in GSO data path.
+	 * Using this table, we provide the IPSec accelerator with the value of
+	 * 1/gso_size so that it can infer the position of each segment inside
+	 * the GSO, and increment the ESP sequence number, and generate the IV.
+	 * The HW needs this value in Q0.16 fixed-point number format
+	 */
+	mlx5e_ipsec_inverse_table[1] = htons(0xFFFF);
+	for (mss = 2; mss < MAX_LSO_MSS; mss++) {
+		mss_inv = ((1ULL << 32) / mss) >> 16;
+		mlx5e_ipsec_inverse_table[mss] = htons(mss_inv);
+	}
+}
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
@@ -34,6 +34,8 @@
 #ifndef __MLX5E_IPSEC_RXTX_H__
 #define __MLX5E_IPSEC_RXTX_H__
 
+#ifdef CONFIG_MLX5_EN_IPSEC
+
 #include <linux/skbuff.h>
 #include "en.h"
 
@@ -41,4 +43,13 @@ struct sk_buff *mlx5e_ipsec_handle_rx_sk
 					  struct sk_buff *skb);
 void mlx5e_ipsec_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
 
-#endif	/* __MLX5E_IPSEC_RXTX_H__ */
+void mlx5e_ipsec_inverse_table_init(void);
+bool mlx5e_ipsec_feature_check(struct sk_buff *skb, struct net_device *netdev,
+			       netdev_features_t features);
+struct sk_buff *mlx5e_ipsec_handle_tx_skb(struct net_device *netdev,
+					  struct mlx5e_tx_wqe *wqe,
+					  struct sk_buff *skb);
+
+#endif /* CONFIG_MLX5_EN_IPSEC */
+
+#endif /* __MLX5E_IPSEC_RXTX_H__ */
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -1113,6 +1113,8 @@ static int mlx5e_alloc_txqsq(struct mlx5
 	sq->uar_map   = mdev->mlx5e_res.bfreg.map;
 	sq->max_inline      = params->tx_max_inline;
 	sq->min_inline_mode = params->tx_min_inline_mode;
+	if (MLX5_IPSEC_DEV(c->priv->mdev))
+		set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state);
 
 	param->wq.db_numa_node = cpu_to_node(c->cpu);
 	err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq, &sq->wq_ctrl);
@@ -1932,6 +1934,7 @@ static void mlx5e_build_sq_param(struct
 
 	mlx5e_build_sq_param_common(priv, param);
 	MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size);
+	MLX5_SET(sqc, sqc, allow_swp, !!MLX5_IPSEC_DEV(priv->mdev));
 }
 
 static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv,
@@ -3524,6 +3527,11 @@ static netdev_features_t mlx5e_features_
 	features = vlan_features_check(skb, features);
 	features = vxlan_features_check(skb, features);
 
+#ifdef CONFIG_MLX5_EN_IPSEC
+	if (mlx5e_ipsec_feature_check(skb, netdev, features))
+		return features;
+#endif
+
 	/* Validate if the tunneled packet is being offloaded by HW */
 	if (skb->encapsulation &&
 	    (features & NETIF_F_CSUM_MASK || features & NETIF_F_GSO_MASK))
@@ -4502,6 +4510,7 @@ static struct mlx5_interface mlx5e_inter
 
 void mlx5e_init(void)
 {
+	mlx5e_ipsec_build_inverse_table();
 	mlx5e_build_ptys2ethtool_map();
 	mlx5_register_interface(&mlx5e_interface);
 }
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -34,6 +34,7 @@
 #include <linux/if_vlan.h>
 #include "en.h"
 #include "ipoib/ipoib.h"
+#include "en_accel/ipsec_rxtx.h"
 
 #define MLX5E_SQ_NOPS_ROOM  MLX5_SEND_WQE_MAX_WQEBBS
 #define MLX5E_SQ_STOP_ROOM (MLX5_SEND_WQE_MAX_WQEBBS +\
@@ -299,12 +300,9 @@ mlx5e_txwqe_complete(struct mlx5e_txqsq
 	}
 }
 
-static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb)
+static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+				 struct mlx5e_tx_wqe *wqe, u16 pi)
 {
-	struct mlx5_wq_cyc       *wq   = &sq->wq;
-
-	u16 pi = sq->pc & wq->sz_m1;
-	struct mlx5e_tx_wqe      *wqe  = mlx5_wq_cyc_get_wqe(wq, pi);
 	struct mlx5e_tx_wqe_info *wi   = &sq->db.wqe_info[pi];
 
 	struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
@@ -319,8 +317,6 @@ static netdev_tx_t mlx5e_sq_xmit(struct
 	u16 ds_cnt;
 	u16 ihs;
 
-	memset(wqe, 0, sizeof(*wqe));
-
 	mlx5e_txwqe_build_eseg_csum(sq, skb, eseg);
 
 	if (skb_is_gso(skb)) {
@@ -375,8 +371,21 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *s
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
 	struct mlx5e_txqsq *sq = priv->txq2sq[skb_get_queue_mapping(skb)];
+	struct mlx5_wq_cyc *wq = &sq->wq;
+	u16 pi = sq->pc & wq->sz_m1;
+	struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi);
+
+	memset(wqe, 0, sizeof(*wqe));
+
+#ifdef CONFIG_MLX5_EN_IPSEC
+	if (sq->state & BIT(MLX5E_SQ_STATE_IPSEC)) {
+		skb = mlx5e_ipsec_handle_tx_skb(dev, wqe, skb);
+		if (unlikely(!skb))
+			return NETDEV_TX_OK;
+	}
+#endif
 
-	return mlx5e_sq_xmit(sq, skb);
+	return mlx5e_sq_xmit(sq, skb, wqe, pi);
 }
 
 bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)