Blob Blame History Raw
From: Dongxiao Xu <dongxiao.xu@intel.com>
Subject: [PATCH 2/3] Netback: Multiple tasklets support.
Patch-mainline: n/a

  Now netback uses one pair of tasklets for Tx/Rx data transaction. Netback
  tasklet could only run at one CPU at a time, and it is used to serve all the
  netfronts. Therefore it has become a performance bottle neck. This patch is to
  use multiple tasklet pairs to replace the current single pair in dom0.

  Assuming that Dom0 has CPUNR VCPUs, we define CPUNR kinds of tasklets pair
  (CPUNR for Tx, and CPUNR for Rx). Each pare of tasklets serve specific group of
  netfronts. Also for those global and static variables, we duplicated them for
  each group in order to avoid the spinlock.

Signed-off-by: Dongxiao Xu <dongxiao.xu@intel.com>

jb: some cleanups
Acked-by: jbeulich@novell.com

--- head.orig/drivers/xen/netback/common.h	2012-01-06 11:10:16.000000000 +0100
+++ head/drivers/xen/netback/common.h	2012-06-08 10:47:27.000000000 +0200
@@ -51,6 +51,7 @@
 typedef struct netif_st {
 	/* Unique identifier for this interface. */
 	domid_t          domid;
+	unsigned int     group;
 	unsigned int     handle;
 
 	u8               fe_dev_addr[6];
@@ -254,6 +255,7 @@ struct xen_netbk {
 
 	struct page **mmap_pages;
 
+	atomic_t nr_groups;
 	unsigned int alloc_index;
 
 	struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];
@@ -281,4 +283,8 @@ struct xen_netbk {
 
 	unsigned long mfn_list[MAX_MFN_ALLOC];
 };
+
+extern struct xen_netbk *xen_netbk;
+extern unsigned int netbk_nr_groups;
+
 #endif /* __NETIF__BACKEND__COMMON_H__ */
--- head.orig/drivers/xen/netback/interface.c	2012-02-10 13:34:52.000000000 +0100
+++ head/drivers/xen/netback/interface.c	2012-02-10 13:35:28.000000000 +0100
@@ -56,14 +56,36 @@ module_param_named(queue_length, netbk_q
 
 static void __netif_up(netif_t *netif)
 {
+	unsigned int group = 0;
+	unsigned int min_groups = atomic_read(&xen_netbk[0].nr_groups);
+	unsigned int i;
+
+	/* Find the list which contains least number of domains. */
+	for (i = 1; i < netbk_nr_groups; i++) {
+		unsigned int nr_groups = atomic_read(&xen_netbk[i].nr_groups);
+
+		if (nr_groups < min_groups) {
+			group = i;
+			min_groups = nr_groups;
+		}
+	}
+
+	atomic_inc(&xen_netbk[group].nr_groups);
+	netif->group = group;
+
 	enable_irq(netif->irq);
 	netif_schedule_work(netif);
 }
 
 static void __netif_down(netif_t *netif)
 {
+	struct xen_netbk *netbk = xen_netbk + netif->group;
+
 	disable_irq(netif->irq);
 	netif_deschedule_work(netif);
+
+	netif->group = UINT_MAX;
+	atomic_dec(&netbk->nr_groups);
 }
 
 static int net_open(struct net_device *dev)
@@ -196,6 +218,7 @@ netif_t *netif_alloc(struct device *pare
 
 	netif = netdev_priv(dev);
 	netif->domid  = domid;
+	netif->group = UINT_MAX;
 	netif->handle = handle;
 	netif->can_sg = 1;
 	netif->csum = 1;
--- head.orig/drivers/xen/netback/netback.c	2012-06-08 10:46:11.000000000 +0200
+++ head/drivers/xen/netback/netback.c	2012-06-08 10:47:24.000000000 +0200
@@ -46,10 +46,10 @@
 
 /*define NETBE_DEBUG_INTERRUPT*/
 
-static struct xen_netbk *__read_mostly xen_netbk;
-static const unsigned int netbk_nr_groups = 1;
+struct xen_netbk *__read_mostly xen_netbk;
+unsigned int __read_mostly netbk_nr_groups;
 
-#define GET_GROUP_INDEX(netif) (0)
+#define GET_GROUP_INDEX(netif) ((netif)->group)
 
 static void netif_idx_release(struct xen_netbk *, u16 pending_idx);
 static void make_tx_response(netif_t *netif, 
@@ -151,6 +151,8 @@ MODULE_PARM_DESC(copy_skb, "Copy data re
 static bool MODPARM_permute_returns;
 module_param_named(permute_returns, MODPARM_permute_returns, bool, S_IRUSR|S_IWUSR);
 MODULE_PARM_DESC(permute_returns, "Randomly permute the order in which TX responses are sent to the frontend");
+module_param_named(groups, netbk_nr_groups, uint, 0);
+MODULE_PARM_DESC(groups, "Specify the number of tasklet pairs to use");
 
 int netbk_copy_skb_mode;
 
@@ -295,10 +297,16 @@ static void tx_queue_callback(unsigned l
 int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	netif_t *netif = netdev_priv(dev);
+	unsigned int group = GET_GROUP_INDEX(netif);
 	struct xen_netbk *netbk;
 
 	BUG_ON(skb->dev != dev);
 
+	if (unlikely(group >= netbk_nr_groups)) {
+		BUG_ON(group != UINT_MAX);
+		goto drop;
+	}
+
 	/* Drop the packet if the target domain has no receive buffers. */
 	if (unlikely(!netif_schedulable(netif) || netbk_queue_full(netif)))
 		goto drop;
@@ -344,7 +352,7 @@ int netif_be_start_xmit(struct sk_buff *
 		}
 	}
 
-	netbk = &xen_netbk[GET_GROUP_INDEX(netif)];
+	netbk = &xen_netbk[group];
 	skb_queue_tail(&netbk->rx_queue, skb);
 	tasklet_schedule(&netbk->net_rx_tasklet);
 
@@ -419,11 +427,13 @@ static u16 netbk_gop_frag(netif_t *netif
 		    (idx = netif_page_index(page)) < MAX_PENDING_REQS &&
 		    (group = netif_page_group(page)) < netbk_nr_groups) {
 			struct pending_tx_info *src_pend;
+			unsigned int grp;
 
 			netbk = &xen_netbk[group];
 			BUG_ON(netbk->mmap_pages[idx] != page);
 			src_pend = &netbk->pending_tx_info[idx];
-			BUG_ON(group != GET_GROUP_INDEX(src_pend->netif));
+			grp = GET_GROUP_INDEX(src_pend->netif);
+			BUG_ON(group != grp && grp != UINT_MAX);
 			copy_gop->source.domid = src_pend->netif->domid;
 			copy_gop->source.u.ref = src_pend->req.gref;
 			copy_gop->flags |= GNTCOPY_source_gref;
@@ -947,7 +957,7 @@ inline static void net_tx_action_dealloc
 		/* Ensure we see all indices enqueued by netif_idx_release(). */
 		smp_rmb();
 
-		if (MODPARM_permute_returns)
+		if (MODPARM_permute_returns && netbk_nr_groups == 1)
 			permute_dealloc_ring(netbk->dealloc_ring, dc, dp);
 
 		while (dc != dp) {
@@ -1558,9 +1568,20 @@ static void netif_page_release(struct pa
 irqreturn_t netif_be_int(int irq, void *dev_id)
 {
 	netif_t *netif = dev_id;
+	unsigned int group = GET_GROUP_INDEX(netif);
+
+	if (unlikely(group >= netbk_nr_groups)) {
+		/*
+		 * Short of having a way to bind the IRQ in disabled mode
+		 * (IRQ_NOAUTOEN), we have to ignore the first invocation(s)
+		 * (before we got assigned to a group).
+		 */
+		BUG_ON(group != UINT_MAX);
+		return IRQ_HANDLED;
+	}
 
 	add_to_net_schedule_list_tail(netif);
-	maybe_schedule_tx_action(GET_GROUP_INDEX(netif));
+	maybe_schedule_tx_action(group);
 
 	if (netif_schedulable(netif) && !netbk_queue_full(netif))
 		netif_wake_queue(netif->dev);
@@ -1677,9 +1698,20 @@ static int __init netback_init(void)
 	if (!is_running_on_xen())
 		return -ENODEV;
 
-	xen_netbk = vzalloc(netbk_nr_groups * sizeof(*xen_netbk));
+	group = netbk_nr_groups;
+	if (!netbk_nr_groups)
+		netbk_nr_groups = (num_online_cpus() + 1) / 2;
+	if (netbk_nr_groups > MAX_GROUPS)
+		netbk_nr_groups = MAX_GROUPS;
+
+	do {
+		xen_netbk = vzalloc(netbk_nr_groups * sizeof(*xen_netbk));
+	} while (!xen_netbk && (netbk_nr_groups >>= 1));
 	if (!xen_netbk)
 		return -ENOMEM;
+	if (group && netbk_nr_groups != group)
+		pr_warn("netback: only using %u (instead of %u) groups\n",
+			netbk_nr_groups, group);
 
 	/* We can increase reservation by this much in net_rx_action(). */
 	balloon_update_driver_allowance(netbk_nr_groups * NET_RX_RING_SIZE);