Blob Blame History Raw
From: Julian Wiedmann <jwi@linux.vnet.ibm.com>
Date: Fri, 1 Dec 2017 10:14:50 +0100
Subject: s390/qeth: fix GSO throughput regression
Patch-mainline: v4.15-rc3
Git-commit: 6d69b1f1eb7a2edf8a3547f361c61f2538e054bb
References: bsc#1061024 FATE#323301

Using GSO with small MTUs currently results in a substantial throughput
regression - which is caused by how qeth needs to map non-linear skbs
into its IO buffer elements:
compared to a linear skb, each GSO-segmented skb effectively consumes
twice as many buffer elements (ie two instead of one) due to the
additional header-only part. This causes the Output Queue to be
congested with low-utilized IO buffers.

Fix this as follows:
If the MSS is low enough so that a non-SG GSO segmentation produces
order-0 skbs (currently ~3500 byte), opt out from NETIF_F_SG. This is
where we anticipate the biggest savings, since an SG-enabled
GSO segmentation produces skbs that always consume at least two
buffer elements.

Larger MSS values continue to get a SG-enabled GSO segmentation, since
1) the relative overhead of the additional header-only buffer element
becomes less noticeable, and
2) the linearization overhead increases.

With the throughput regression fixed, re-enable NETIF_F_SG by default to
reap the significant CPU savings of GSO.

Fixes: 5722963a8e83 ("qeth: do not turn on SG per default")
Reported-by: Nils Hoppmann <niho@de.ibm.com>
Signed-off-by: Julian Wiedmann <jwi@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Acked-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
---
 drivers/s390/net/qeth_core.h      |    3 +++
 drivers/s390/net/qeth_core_main.c |   31 +++++++++++++++++++++++++++++++
 drivers/s390/net/qeth_l2_main.c   |    2 ++
 drivers/s390/net/qeth_l3_main.c   |    2 ++
 4 files changed, 38 insertions(+)

--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -1000,6 +1000,9 @@ struct qeth_cmd_buffer *qeth_get_setassp
 int qeth_set_features(struct net_device *, netdev_features_t);
 void qeth_recover_features(struct net_device *dev);
 netdev_features_t qeth_fix_features(struct net_device *, netdev_features_t);
+netdev_features_t qeth_features_check(struct sk_buff *skb,
+				      struct net_device *dev,
+				      netdev_features_t features);
 int qeth_vm_request_mac(struct qeth_card *card);
 int qeth_push_hdr(struct sk_buff *skb, struct qeth_hdr **hdr, unsigned int len);
 
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -19,6 +19,11 @@
 #include <linux/mii.h>
 #include <linux/kthread.h>
 #include <linux/slab.h>
+#include <linux/if_vlan.h>
+#include <linux/netdevice.h>
+#include <linux/netdev_features.h>
+#include <linux/skbuff.h>
+
 #include <net/iucv/af_iucv.h>
 #include <net/dsfield.h>
 
@@ -6500,6 +6505,32 @@ netdev_features_t qeth_fix_features(stru
 }
 EXPORT_SYMBOL_GPL(qeth_fix_features);
 
+netdev_features_t qeth_features_check(struct sk_buff *skb,
+				      struct net_device *dev,
+				      netdev_features_t features)
+{
+	/* GSO segmentation builds skbs with
+	 *	a (small) linear part for the headers, and
+	 *	page frags for the data.
+	 * Compared to a linear skb, the header-only part consumes an
+	 * additional buffer element. This reduces buffer utilization, and
+	 * hurts throughput. So compress small segments into one element.
+	 */
+	if (netif_needs_gso(skb, features)) {
+		/* match skb_segment(): */
+		unsigned int doffset = skb->data - skb_mac_header(skb);
+		unsigned int hsize = skb_shinfo(skb)->gso_size;
+		unsigned int hroom = skb_headroom(skb);
+
+		/* linearize only if resulting skb allocations are order-0: */
+		if (SKB_DATA_ALIGN(hroom + doffset + hsize) <= SKB_MAX_HEAD(0))
+			features &= ~NETIF_F_SG;
+	}
+
+	return vlan_features_check(skb, features);
+}
+EXPORT_SYMBOL_GPL(qeth_features_check);
+
 static int __init qeth_core_init(void)
 {
 	int rc;
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -991,6 +991,7 @@ static const struct net_device_ops qeth_
 	.ndo_stop		= qeth_l2_stop,
 	.ndo_get_stats		= qeth_get_stats,
 	.ndo_start_xmit		= qeth_l2_hard_start_xmit,
+	.ndo_features_check	= qeth_features_check,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_rx_mode	= qeth_l2_set_rx_mode,
 	.ndo_do_ioctl		= qeth_do_ioctl,
@@ -1037,6 +1038,7 @@ static int qeth_l2_setup_netdev(struct q
 	if (card->info.type == QETH_CARD_TYPE_OSD && !card->info.guestlan) {
 		card->dev->hw_features = NETIF_F_SG;
 		card->dev->vlan_features = NETIF_F_SG;
+		card->dev->features |= NETIF_F_SG;
 		/* OSA 3S and earlier has no RX/TX support */
 		if (qeth_is_supported(card, IPA_OUTBOUND_CHECKSUM)) {
 			card->dev->hw_features |= NETIF_F_IP_CSUM;
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -2923,6 +2923,7 @@ static const struct net_device_ops qeth_
 	.ndo_stop		= qeth_l3_stop,
 	.ndo_get_stats		= qeth_get_stats,
 	.ndo_start_xmit		= qeth_l3_hard_start_xmit,
+	.ndo_features_check	= qeth_features_check,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_rx_mode	= qeth_l3_set_multicast_list,
 	.ndo_do_ioctl		= qeth_do_ioctl,
@@ -2963,6 +2964,7 @@ static int qeth_l3_setup_netdev(struct q
 				card->dev->vlan_features = NETIF_F_SG |
 					NETIF_F_RXCSUM | NETIF_F_IP_CSUM |
 					NETIF_F_TSO;
+				card->dev->features |= NETIF_F_SG;
 			}
 		}
 	} else if (card->info.type == QETH_CARD_TYPE_IQD) {