Blob Blame History Raw
From: Ganesh Goudar <ganeshgr@chelsio.com>
Date: Fri, 11 May 2018 18:35:33 +0530
Subject: cxgb4: enable inner header checksum calculation
Patch-mainline: v4.18-rc1
Git-commit: c50ae55e41ee226b96cf204cd1409bc0057b484e
References: bsc#1097585 bsc#1097586 bsc#1097587 bsc#1097588 bsc#1097583 bsc#1097584

set cntrl bits to indicate whether inner header checksum
needs to be calculated whenever the packet is an encapsulated
packet and enable supported encap features.

Fixes: d0a1299c6bf7 ("cxgb4: add support for vxlan segmentation offload")
Signed-off-by: Ganesh Goudar <ganeshgr@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Acked-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c |    9 ++
 drivers/net/ethernet/chelsio/cxgb4/sge.c        |   86 ++++++++++++++++++------
 drivers/net/ethernet/chelsio/cxgb4/t4_msg.h     |    5 +
 3 files changed, 81 insertions(+), 19 deletions(-)

--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -5617,8 +5617,15 @@ static int init_one(struct pci_dev *pdev
 			NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX |
 			NETIF_F_HW_TC;
 
-		if (CHELSIO_CHIP_VERSION(chip) > CHELSIO_T5)
+		if (CHELSIO_CHIP_VERSION(chip) > CHELSIO_T5) {
+			netdev->hw_enc_features |= NETIF_F_IP_CSUM |
+						   NETIF_F_IPV6_CSUM |
+						   NETIF_F_RXCSUM |
+						   NETIF_F_GSO_UDP_TUNNEL |
+						   NETIF_F_TSO | NETIF_F_TSO6;
+
 			netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
+		}
 
 		if (highdma)
 			netdev->hw_features |= NETIF_F_HIGHDMA;
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -1072,12 +1072,27 @@ static void *inline_tx_skb_header(const
 static u64 hwcsum(enum chip_type chip, const struct sk_buff *skb)
 {
 	int csum_type;
-	const struct iphdr *iph = ip_hdr(skb);
+	bool inner_hdr_csum = false;
+	u16 proto, ver;
 
-	if (iph->version == 4) {
-		if (iph->protocol == IPPROTO_TCP)
+	if (skb->encapsulation &&
+	    (CHELSIO_CHIP_VERSION(chip) > CHELSIO_T5))
+		inner_hdr_csum = true;
+
+	if (inner_hdr_csum) {
+		ver = inner_ip_hdr(skb)->version;
+		proto = (ver == 4) ? inner_ip_hdr(skb)->protocol :
+			inner_ipv6_hdr(skb)->nexthdr;
+	} else {
+		ver = ip_hdr(skb)->version;
+		proto = (ver == 4) ? ip_hdr(skb)->protocol :
+			ipv6_hdr(skb)->nexthdr;
+	}
+
+	if (ver == 4) {
+		if (proto == IPPROTO_TCP)
 			csum_type = TX_CSUM_TCPIP;
-		else if (iph->protocol == IPPROTO_UDP)
+		else if (proto == IPPROTO_UDP)
 			csum_type = TX_CSUM_UDPIP;
 		else {
 nocsum:			/*
@@ -1090,19 +1105,29 @@ nocsum:			/*
 		/*
 		 * this doesn't work with extension headers
 		 */
-		const struct ipv6hdr *ip6h = (const struct ipv6hdr *)iph;
-
-		if (ip6h->nexthdr == IPPROTO_TCP)
+		if (proto == IPPROTO_TCP)
 			csum_type = TX_CSUM_TCPIP6;
-		else if (ip6h->nexthdr == IPPROTO_UDP)
+		else if (proto == IPPROTO_UDP)
 			csum_type = TX_CSUM_UDPIP6;
 		else
 			goto nocsum;
 	}
 
 	if (likely(csum_type >= TX_CSUM_TCPIP)) {
-		u64 hdr_len = TXPKT_IPHDR_LEN_V(skb_network_header_len(skb));
-		int eth_hdr_len = skb_network_offset(skb) - ETH_HLEN;
+		int eth_hdr_len, l4_len;
+		u64 hdr_len;
+
+		if (inner_hdr_csum) {
+			/* This allows checksum offload for all encapsulated
+			 * packets like GRE etc..
+			 */
+			l4_len = skb_inner_network_header_len(skb);
+			eth_hdr_len = skb_inner_network_offset(skb) - ETH_HLEN;
+		} else {
+			l4_len = skb_network_header_len(skb);
+			eth_hdr_len = skb_network_offset(skb) - ETH_HLEN;
+		}
+		hdr_len = TXPKT_IPHDR_LEN_V(l4_len);
 
 		if (CHELSIO_CHIP_VERSION(chip) <= CHELSIO_T5)
 			hdr_len |= TXPKT_ETHHDR_LEN_V(eth_hdr_len);
@@ -1273,7 +1298,7 @@ static inline void t6_fill_tnl_lso(struc
 netdev_tx_t t4_eth_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	u32 wr_mid, ctrl0, op;
-	u64 cntrl, *end;
+	u64 cntrl, *end, *sgl;
 	int qidx, credits;
 	unsigned int flits, ndesc;
 	struct adapter *adap;
@@ -1443,6 +1468,19 @@ out_free:	dev_kfree_skb_any(skb);
 				 TX_CSUM_TCPIP6 : TX_CSUM_TCPIP) |
 				 TXPKT_IPHDR_LEN_V(l3hdr_len);
 		}
+		sgl = (u64 *)(cpl + 1); /* sgl start here */
+		if (unlikely((u8 *)sgl >= (u8 *)q->q.stat)) {
+			/* If current position is already at the end of the
+			 * txq, reset the current to point to start of the queue
+			 * and update the end ptr as well.
+			 */
+			if (sgl == (u64 *)q->q.stat) {
+				int left = (u8 *)end - (u8 *)q->q.stat;
+
+				end = (void *)q->q.desc + left;
+				sgl = (void *)q->q.desc;
+			}
+		}
 		q->tso++;
 		q->tx_cso += ssi->gso_segs;
 	} else {
@@ -1454,6 +1492,7 @@ out_free:	dev_kfree_skb_any(skb);
 		wr->op_immdlen = htonl(FW_WR_OP_V(op) |
 				       FW_WR_IMMDLEN_V(len));
 		cpl = (void *)(wr + 1);
+		sgl = (u64 *)(cpl + 1);
 		if (skb->ip_summed == CHECKSUM_PARTIAL) {
 			cntrl = hwcsum(adap->params.chip, skb) |
 				TXPKT_IPCSUM_DIS_F;
@@ -2164,7 +2203,7 @@ static void cxgb4_sgetim_to_hwtstamp(str
 }
 
 static void do_gro(struct sge_eth_rxq *rxq, const struct pkt_gl *gl,
-		   const struct cpl_rx_pkt *pkt)
+		   const struct cpl_rx_pkt *pkt, unsigned long tnl_hdr_len)
 {
 	struct adapter *adapter = rxq->rspq.adap;
 	struct sge *s = &adapter->sge;
@@ -2180,6 +2219,8 @@ static void do_gro(struct sge_eth_rxq *r
 	}
 
 	copy_frags(skb, gl, s->pktshift);
+	if (tnl_hdr_len)
+		skb->csum_level = 1;
 	skb->len = gl->tot_len - s->pktshift;
 	skb->data_len = skb->len;
 	skb->truesize += skb->data_len;
@@ -2311,7 +2352,7 @@ int t4_ethrx_handler(struct sge_rspq *q,
 	struct sge *s = &q->adap->sge;
 	int cpl_trace_pkt = is_t4(q->adap->params.chip) ?
 			    CPL_TRACE_PKT : CPL_TRACE_PKT_T5;
-	u16 err_vec;
+	u16 err_vec, tnl_hdr_len = 0;
 	struct port_info *pi;
 	int ret = 0;
 
@@ -2320,16 +2361,19 @@ int t4_ethrx_handler(struct sge_rspq *q,
 
 	pkt = (const struct cpl_rx_pkt *)rsp;
 	/* Compressed error vector is enabled for T6 only */
-	if (q->adap->params.tp.rx_pkt_encap)
+	if (q->adap->params.tp.rx_pkt_encap) {
 		err_vec = T6_COMPR_RXERR_VEC_G(be16_to_cpu(pkt->err_vec));
-	else
+		tnl_hdr_len = T6_RX_TNLHDR_LEN_G(ntohs(pkt->err_vec));
+	} else {
 		err_vec = be16_to_cpu(pkt->err_vec);
+	}
 
 	csum_ok = pkt->csum_calc && !err_vec &&
 		  (q->netdev->features & NETIF_F_RXCSUM);
-	if ((pkt->l2info & htonl(RXF_TCP_F)) &&
+	if (((pkt->l2info & htonl(RXF_TCP_F)) ||
+	     tnl_hdr_len) &&
 	    (q->netdev->features & NETIF_F_GRO) && csum_ok && !pkt->ip_frag) {
-		do_gro(rxq, si, pkt);
+		do_gro(rxq, si, pkt, tnl_hdr_len);
 		return 0;
 	}
 
@@ -2376,7 +2420,13 @@ int t4_ethrx_handler(struct sge_rspq *q,
 		} else if (pkt->l2info & htonl(RXF_IP_F)) {
 			__sum16 c = (__force __sum16)pkt->csum;
 			skb->csum = csum_unfold(c);
-			skb->ip_summed = CHECKSUM_COMPLETE;
+
+			if (tnl_hdr_len) {
+				skb->ip_summed = CHECKSUM_UNNECESSARY;
+				skb->csum_level = 1;
+			} else {
+				skb->ip_summed = CHECKSUM_COMPLETE;
+			}
 			rxq->stats.rx_cso++;
 		}
 	} else {
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
@@ -1229,6 +1229,11 @@ struct cpl_rx_pkt {
 #define T6_COMPR_RXERR_SUM_V(x) ((x) << T6_COMPR_RXERR_SUM_S)
 #define T6_COMPR_RXERR_SUM_F    T6_COMPR_RXERR_SUM_V(1U)
 
+#define T6_RX_TNLHDR_LEN_S    8
+#define T6_RX_TNLHDR_LEN_M    0xFF
+#define T6_RX_TNLHDR_LEN_V(x) ((x) << T6_RX_TNLHDR_LEN_S)
+#define T6_RX_TNLHDR_LEN_G(x) (((x) >> T6_RX_TNLHDR_LEN_S) & T6_RX_TNLHDR_LEN_M)
+
 struct cpl_trace_pkt {
 	u8 opcode;
 	u8 intf;