Blob Blame History Raw
From: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
Date: Fri, 22 Nov 2019 06:30:02 +0530
Subject: cxgb4: add UDP segmentation offload support
Patch-mainline: v5.5-rc1
Git-commit: 1a2a14fbc7e7df35aebb929ed6cdb1fcf238e9d2
References: jsc#SLE-8389

Implement and export UDP segmentation offload (USO) support for both
NIC and MQPRIO QoS offload Tx path. Update appropriate logic in Tx to
parse GSO info in skb and configure FW_ETH_TX_EO_WR request needed to
perform USO.

v2:
- Remove inline keyword from write_eo_udp_wr() in sge.c. Let the
  compiler decide.

Signed-off-by: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Acked-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4.h         |    1 
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c |    1 
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c |    3 
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c    |   11 +
 drivers/net/ethernet/chelsio/cxgb4/sge.c           |  157 ++++++++++++++-------
 drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h      |   14 +
 6 files changed, 138 insertions(+), 49 deletions(-)

--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -772,6 +772,7 @@ struct sge_eth_txq {                /* s
 	u8 dbqt;                    /* SGE Doorbell Queue Timer in use */
 	unsigned int dbqtimerix;    /* SGE Doorbell Queue Timer Index */
 	unsigned long tso;          /* # of TSO requests */
+	unsigned long uso;          /* # of USO requests */
 	unsigned long tx_cso;       /* # of Tx checksum offloads */
 	unsigned long vlan_ins;     /* # of Tx VLAN insertions */
 	unsigned long mapping_err;  /* # of I/O MMU packet mapping errors */
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
@@ -2748,6 +2748,7 @@ do { \
 		RL("RxDrops:", stats.rx_drops);
 		RL("RxBadPkts:", stats.bad_rx_pkts);
 		TL("TSO:", tso);
+		TL("USO:", uso);
 		TL("TxCSO:", tx_cso);
 		TL("VLANins:", vlan_ins);
 		TL("TxQFull:", q.stops);
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
@@ -91,6 +91,7 @@ static const char stats_strings[][ETH_GS
 	"rx_bg3_frames_trunc    ",
 
 	"tso                    ",
+	"uso                    ",
 	"tx_csum_offload        ",
 	"rx_csum_good           ",
 	"vlan_extractions       ",
@@ -220,6 +221,7 @@ static void get_strings(struct net_devic
  */
 struct queue_port_stats {
 	u64 tso;
+	u64 uso;
 	u64 tx_csum;
 	u64 rx_csum;
 	u64 vlan_ex;
@@ -247,6 +249,7 @@ static void collect_sge_port_stats(const
 	memset(s, 0, sizeof(*s));
 	for (i = 0; i < p->nqsets; i++, rx++, tx++) {
 		s->tso += tx->tso;
+		s->uso += tx->uso;
 		s->tx_csum += tx->tx_cso;
 		s->rx_csum += rx->stats.rx_cso;
 		s->vlan_ex += rx->stats.vlan_ex;
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -1136,11 +1136,17 @@ static u16 cxgb_select_queue(struct net_
 
 	if (dev->num_tc) {
 		struct port_info *pi = netdev2pinfo(dev);
+		u8 ver, proto;
+
+		ver = ip_hdr(skb)->version;
+		proto = (ver == 6) ? ipv6_hdr(skb)->nexthdr :
+				     ip_hdr(skb)->protocol;
 
 		/* Send unsupported traffic pattern to normal NIC queues. */
 		txq = netdev_pick_tx(dev, skb, sb_dev);
 		if (xfrm_offload(skb) || is_ptp_enabled(skb, dev) ||
-		    ip_hdr(skb)->protocol != IPPROTO_TCP)
+		    skb->encapsulation ||
+		    (proto != IPPROTO_TCP && proto != IPPROTO_UDP))
 			txq = txq % pi->nqsets;
 
 		return txq;
@@ -5838,7 +5844,8 @@ static void free_some_resources(struct a
 		t4_fw_bye(adapter, adapter->pf);
 }
 
-#define TSO_FLAGS (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN)
+#define TSO_FLAGS (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN | \
+		   NETIF_F_GSO_UDP_L4)
 #define VLAN_FEAT (NETIF_F_SG | NETIF_F_IP_CSUM | TSO_FLAGS | \
 		   NETIF_F_GRO | NETIF_F_IPV6_CSUM | NETIF_F_HIGHDMA)
 #define SEGMENT_SIZE 128
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -734,6 +734,8 @@ static inline int is_eth_imm(const struc
 	    chip_ver > CHELSIO_T5) {
 		hdrlen = sizeof(struct cpl_tx_tnl_lso);
 		hdrlen += sizeof(struct cpl_tx_pkt_core);
+	} else if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) {
+		return 0;
 	} else {
 		hdrlen = skb_shinfo(skb)->gso_size ?
 			 sizeof(struct cpl_tx_pkt_lso_core) : 0;
@@ -775,12 +777,20 @@ static inline unsigned int calc_tx_flits
 	 */
 	flits = sgl_len(skb_shinfo(skb)->nr_frags + 1);
 	if (skb_shinfo(skb)->gso_size) {
-		if (skb->encapsulation && chip_ver > CHELSIO_T5)
+		if (skb->encapsulation && chip_ver > CHELSIO_T5) {
 			hdrlen = sizeof(struct fw_eth_tx_pkt_wr) +
 				 sizeof(struct cpl_tx_tnl_lso);
-		else
+		} else if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) {
+			u32 pkt_hdrlen;
+
+			pkt_hdrlen = eth_get_headlen(skb->dev, skb->data,
+						     skb_headlen(skb));
+			hdrlen = sizeof(struct fw_eth_tx_eo_wr) +
+				 round_up(pkt_hdrlen, 16);
+		} else {
 			hdrlen = sizeof(struct fw_eth_tx_pkt_wr) +
 				 sizeof(struct cpl_tx_pkt_lso_core);
+		}
 
 		hdrlen += sizeof(struct cpl_tx_pkt_core);
 		flits += (hdrlen / sizeof(__be64));
@@ -1345,6 +1355,25 @@ static inline int cxgb4_validate_skb(str
 	return 0;
 }
 
+static void *write_eo_udp_wr(struct sk_buff *skb, struct fw_eth_tx_eo_wr *wr,
+			     u32 hdr_len)
+{
+	wr->u.udpseg.type = FW_ETH_TX_EO_TYPE_UDPSEG;
+	wr->u.udpseg.ethlen = skb_network_offset(skb);
+	wr->u.udpseg.iplen = cpu_to_be16(skb_network_header_len(skb));
+	wr->u.udpseg.udplen = sizeof(struct udphdr);
+	wr->u.udpseg.rtplen = 0;
+	wr->u.udpseg.r4 = 0;
+	if (skb_shinfo(skb)->gso_size)
+		wr->u.udpseg.mss = cpu_to_be16(skb_shinfo(skb)->gso_size);
+	else
+		wr->u.udpseg.mss = cpu_to_be16(skb->len - hdr_len);
+	wr->u.udpseg.schedpktsize = wr->u.udpseg.mss;
+	wr->u.udpseg.plen = cpu_to_be32(skb->len - hdr_len);
+
+	return (void *)(wr + 1);
+}
+
 /**
  *	cxgb4_eth_xmit - add a packet to an Ethernet Tx queue
  *	@skb: the packet
@@ -1357,14 +1386,15 @@ static netdev_tx_t cxgb4_eth_xmit(struct
 	enum cpl_tx_tnl_lso_type tnl_type = TX_TNL_TYPE_OPAQUE;
 	bool ptp_enabled = is_ptp_enabled(skb, dev);
 	unsigned int last_desc, flits, ndesc;
+	u32 wr_mid, ctrl0, op, sgl_off = 0;
 	const struct skb_shared_info *ssi;
+	int len, qidx, credits, ret, left;
 	struct tx_sw_desc *sgl_sdesc;
+	struct fw_eth_tx_eo_wr *eowr;
 	struct fw_eth_tx_pkt_wr *wr;
 	struct cpl_tx_pkt_core *cpl;
-	int len, qidx, credits, ret;
 	const struct port_info *pi;
 	bool immediate = false;
-	u32 wr_mid, ctrl0, op;
 	u64 cntrl, *end, *sgl;
 	struct sge_eth_txq *q;
 	unsigned int chip_ver;
@@ -1469,13 +1499,17 @@ static netdev_tx_t cxgb4_eth_xmit(struct
 	}
 
 	wr = (void *)&q->q.desc[q->q.pidx];
+	eowr = (void *)&q->q.desc[q->q.pidx];
 	wr->equiq_to_len16 = htonl(wr_mid);
 	wr->r3 = cpu_to_be64(0);
-	end = (u64 *)wr + flits;
+	if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4)
+		end = (u64 *)eowr + flits;
+	else
+		end = (u64 *)wr + flits;
 
 	len = immediate ? skb->len : 0;
 	len += sizeof(*cpl);
-	if (ssi->gso_size) {
+	if (ssi->gso_size && !(ssi->gso_type & SKB_GSO_UDP_L4)) {
 		struct cpl_tx_pkt_lso_core *lso = (void *)(wr + 1);
 		struct cpl_tx_tnl_lso *tnl_lso = (void *)(wr + 1);
 
@@ -1507,20 +1541,29 @@ static netdev_tx_t cxgb4_eth_xmit(struct
 			cntrl = hwcsum(adap->params.chip, skb);
 		}
 		sgl = (u64 *)(cpl + 1); /* sgl start here */
-		if (unlikely((u8 *)sgl >= (u8 *)q->q.stat)) {
-			/* If current position is already at the end of the
-			 * txq, reset the current to point to start of the queue
-			 * and update the end ptr as well.
-			 */
-			if (sgl == (u64 *)q->q.stat) {
-				int left = (u8 *)end - (u8 *)q->q.stat;
-
-				end = (void *)q->q.desc + left;
-				sgl = (void *)q->q.desc;
-			}
-		}
 		q->tso++;
 		q->tx_cso += ssi->gso_segs;
+	} else if (ssi->gso_size) {
+		u64 *start;
+		u32 hdrlen;
+
+		hdrlen = eth_get_headlen(dev, skb->data, skb_headlen(skb));
+		len += hdrlen;
+		wr->op_immdlen = cpu_to_be32(FW_WR_OP_V(FW_ETH_TX_EO_WR) |
+					     FW_ETH_TX_EO_WR_IMMDLEN_V(len));
+		cpl = write_eo_udp_wr(skb, eowr, hdrlen);
+		cntrl = hwcsum(adap->params.chip, skb);
+
+		start = (u64 *)(cpl + 1);
+		sgl = (u64 *)inline_tx_skb_header(skb, &q->q, (void *)start,
+						  hdrlen);
+		if (unlikely(start > sgl)) {
+			left = (u8 *)end - (u8 *)q->q.stat;
+			end = (void *)q->q.desc + left;
+		}
+		sgl_off = hdrlen;
+		q->uso++;
+		q->tx_cso += ssi->gso_segs;
 	} else {
 		if (ptp_enabled)
 			op = FW_PTP_TX_PKT_WR;
@@ -1537,6 +1580,16 @@ static netdev_tx_t cxgb4_eth_xmit(struct
 		}
 	}
 
+	if (unlikely((u8 *)sgl >= (u8 *)q->q.stat)) {
+		/* If current position is already at the end of the
+		 * txq, reset the current to point to start of the queue
+		 * and update the end ptr as well.
+		 */
+		left = (u8 *)end - (u8 *)q->q.stat;
+		end = (void *)q->q.desc + left;
+		sgl = (void *)q->q.desc;
+	}
+
 	if (skb_vlan_tag_present(skb)) {
 		q->vlan_ins++;
 		cntrl |= TXPKT_VLAN_VLD_F | TXPKT_VLAN_V(skb_vlan_tag_get(skb));
@@ -1566,7 +1619,7 @@ static netdev_tx_t cxgb4_eth_xmit(struct
 		cxgb4_inline_tx_skb(skb, &q->q, sgl);
 		dev_consume_skb_any(skb);
 	} else {
-		cxgb4_write_sgl(skb, &q->q, (void *)sgl, end, 0,
+		cxgb4_write_sgl(skb, &q->q, (void *)sgl, end, sgl_off,
 				sgl_sdesc->addr);
 		skb_orphan(skb);
 		sgl_sdesc->skb = skb;
@@ -2024,7 +2077,8 @@ static inline u8 ethofld_calc_tx_flits(s
 	u32 wrlen;
 
 	wrlen = sizeof(struct fw_eth_tx_eo_wr) + sizeof(struct cpl_tx_pkt_core);
-	if (skb_shinfo(skb)->gso_size)
+	if (skb_shinfo(skb)->gso_size &&
+	    !(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4))
 		wrlen += sizeof(struct cpl_tx_pkt_lso_core);
 
 	wrlen += roundup(hdr_len, 16);
@@ -2032,10 +2086,14 @@ static inline u8 ethofld_calc_tx_flits(s
 	/* Packet headers + WR + CPLs */
 	flits = DIV_ROUND_UP(wrlen, 8);
 
-	if (skb_shinfo(skb)->nr_frags > 0)
-		nsgl = sgl_len(skb_shinfo(skb)->nr_frags);
-	else if (skb->len - hdr_len)
+	if (skb_shinfo(skb)->nr_frags > 0) {
+		if (skb_headlen(skb) - hdr_len)
+			nsgl = sgl_len(skb_shinfo(skb)->nr_frags + 1);
+		else
+			nsgl = sgl_len(skb_shinfo(skb)->nr_frags);
+	} else if (skb->len - hdr_len) {
 		nsgl = sgl_len(1);
+	}
 
 	return flits + nsgl;
 }
@@ -2049,16 +2107,16 @@ static inline void *write_eo_wr(struct a
 	struct cpl_tx_pkt_core *cpl;
 	u32 immd_len, wrlen16;
 	bool compl = false;
+	u8 ver, proto;
+
+	ver = ip_hdr(skb)->version;
+	proto = (ver == 6) ? ipv6_hdr(skb)->nexthdr : ip_hdr(skb)->protocol;
 
 	wrlen16 = DIV_ROUND_UP(wrlen, 16);
 	immd_len = sizeof(struct cpl_tx_pkt_core);
-	if (skb_shinfo(skb)->gso_size) {
-		if (skb->encapsulation &&
-		    CHELSIO_CHIP_VERSION(adap->params.chip) > CHELSIO_T5)
-			immd_len += sizeof(struct cpl_tx_tnl_lso);
-		else
-			immd_len += sizeof(struct cpl_tx_pkt_lso_core);
-	}
+	if (skb_shinfo(skb)->gso_size &&
+	    !(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4))
+		immd_len += sizeof(struct cpl_tx_pkt_lso_core);
 	immd_len += hdr_len;
 
 	if (!eosw_txq->ncompl ||
@@ -2074,23 +2132,27 @@ static inline void *write_eo_wr(struct a
 	wr->equiq_to_len16 = cpu_to_be32(FW_WR_LEN16_V(wrlen16) |
 					 FW_WR_FLOWID_V(eosw_txq->hwtid));
 	wr->r3 = 0;
-	wr->u.tcpseg.type = FW_ETH_TX_EO_TYPE_TCPSEG;
-	wr->u.tcpseg.ethlen = skb_network_offset(skb);
-	wr->u.tcpseg.iplen = cpu_to_be16(skb_network_header_len(skb));
-	wr->u.tcpseg.tcplen = tcp_hdrlen(skb);
-	wr->u.tcpseg.tsclk_tsoff = 0;
-	wr->u.tcpseg.r4 = 0;
-	wr->u.tcpseg.r5 = 0;
-	wr->u.tcpseg.plen = cpu_to_be32(skb->len - hdr_len);
+	if (proto == IPPROTO_UDP) {
+		cpl = write_eo_udp_wr(skb, wr, hdr_len);
+	} else {
+		wr->u.tcpseg.type = FW_ETH_TX_EO_TYPE_TCPSEG;
+		wr->u.tcpseg.ethlen = skb_network_offset(skb);
+		wr->u.tcpseg.iplen = cpu_to_be16(skb_network_header_len(skb));
+		wr->u.tcpseg.tcplen = tcp_hdrlen(skb);
+		wr->u.tcpseg.tsclk_tsoff = 0;
+		wr->u.tcpseg.r4 = 0;
+		wr->u.tcpseg.r5 = 0;
+		wr->u.tcpseg.plen = cpu_to_be32(skb->len - hdr_len);
 
-	if (ssi->gso_size) {
-		struct cpl_tx_pkt_lso_core *lso = (void *)(wr + 1);
+		if (ssi->gso_size) {
+			struct cpl_tx_pkt_lso_core *lso = (void *)(wr + 1);
 
-		wr->u.tcpseg.mss = cpu_to_be16(ssi->gso_size);
-		cpl = write_tso_wr(adap, skb, lso);
-	} else {
-		wr->u.tcpseg.mss = cpu_to_be16(0xffff);
-		cpl = (void *)(wr + 1);
+			wr->u.tcpseg.mss = cpu_to_be16(ssi->gso_size);
+			cpl = write_tso_wr(adap, skb, lso);
+		} else {
+			wr->u.tcpseg.mss = cpu_to_be16(0xffff);
+			cpl = (void *)(wr + 1);
+		}
 	}
 
 	eosw_txq->cred -= wrlen16;
@@ -4312,7 +4374,10 @@ int t4_sge_alloc_eth_txq(struct adapter
 	txq->q.q_type = CXGB4_TXQ_ETH;
 	init_txq(adap, &txq->q, FW_EQ_ETH_CMD_EQID_G(ntohl(c.eqid_pkd)));
 	txq->txq = netdevq;
-	txq->tso = txq->tx_cso = txq->vlan_ins = 0;
+	txq->tso = 0;
+	txq->uso = 0;
+	txq->tx_cso = 0;
+	txq->vlan_ins = 0;
 	txq->mapping_err = 0;
 	txq->dbqt = dbqt;
 
--- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
@@ -536,7 +536,8 @@ struct fw_eth_tx_pkt_wr {
 };
 
 enum fw_eth_tx_eo_type {
-	FW_ETH_TX_EO_TYPE_TCPSEG = 1,
+	FW_ETH_TX_EO_TYPE_UDPSEG = 0,
+	FW_ETH_TX_EO_TYPE_TCPSEG,
 };
 
 struct fw_eth_tx_eo_wr {
@@ -544,6 +545,17 @@ struct fw_eth_tx_eo_wr {
 	__be32 equiq_to_len16;
 	__be64 r3;
 	union fw_eth_tx_eo {
+		struct fw_eth_tx_eo_udpseg {
+			__u8   type;
+			__u8   ethlen;
+			__be16 iplen;
+			__u8   udplen;
+			__u8   rtplen;
+			__be16 r4;
+			__be16 mss;
+			__be16 schedpktsize;
+			__be32 plen;
+		} udpseg;
 		struct fw_eth_tx_eo_tcpseg {
 			__u8   type;
 			__u8   ethlen;