Blob Blame History Raw
From: Erez Shitrit <erezsh@mellanox.com>
Date: Sun, 29 Jul 2018 11:34:52 +0300
Subject: IB/ipoib: Use cancel_delayed_work_sync for neigh-clean task
Patch-mainline: v4.19-rc1
Git-commit: cda8daf17914a261986d6d4b7294599736d5a463
References: bsc#1103992 FATE#326009

The neigh_reap_task is self restarting, but so long as we call
cancel_delayed_work_sync() it will be guaranteed to not be running and
never start again. Thus we don't need to have the racy
IPOIB_STOP_NEIGH_GC bit, or the confusing mismatch of places sometimes
calling flush_workqueue after the cancel.

This fixes a situation where the GC work could have been left running
in some rare situations.

Signed-off-by: Erez Shitrit <erezsh@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Acked-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
---
 drivers/infiniband/ulp/ipoib/ipoib.h      |    1 
 drivers/infiniband/ulp/ipoib/ipoib_main.c |   33 +++++++++---------------------
 2 files changed, 10 insertions(+), 24 deletions(-)

--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -91,7 +91,6 @@ enum {
 	IPOIB_STOP_REAPER	  = 7,
 	IPOIB_FLAG_ADMIN_CM	  = 9,
 	IPOIB_FLAG_UMCAST	  = 10,
-	IPOIB_STOP_NEIGH_GC	  = 11,
 	IPOIB_NEIGH_TBL_FLUSH	  = 12,
 	IPOIB_FLAG_DEV_ADDR_SET	  = 13,
 	IPOIB_FLAG_DEV_ADDR_CTRL  = 14,
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -1310,9 +1310,6 @@ static void __ipoib_reap_neigh(struct ip
 	int i;
 	LIST_HEAD(remove_list);
 
-	if (test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags))
-		return;
-
 	spin_lock_irqsave(&priv->lock, flags);
 
 	htbl = rcu_dereference_protected(ntbl->htbl,
@@ -1324,9 +1321,6 @@ static void __ipoib_reap_neigh(struct ip
 	/* neigh is obsolete if it was idle for two GC periods */
 	dt = 2 * arp_tbl.gc_interval;
 	neigh_obsolete = jiffies - dt;
-	/* handle possible race condition */
-	if (test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags))
-		goto out_unlock;
 
 	for (i = 0; i < htbl->size; i++) {
 		struct ipoib_neigh *neigh;
@@ -1364,9 +1358,8 @@ static void ipoib_reap_neigh(struct work
 
 	__ipoib_reap_neigh(priv);
 
-	if (!test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags))
-		queue_delayed_work(priv->wq, &priv->neigh_reap_task,
-				   arp_tbl.gc_interval);
+	queue_delayed_work(priv->wq, &priv->neigh_reap_task,
+			   arp_tbl.gc_interval);
 }
 
 
@@ -1528,7 +1521,6 @@ static int ipoib_neigh_hash_init(struct
 	htbl = kzalloc(sizeof(*htbl), GFP_KERNEL);
 	if (!htbl)
 		return -ENOMEM;
-	set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
 	size = roundup_pow_of_two(arp_tbl.gc_thresh3);
 	buckets = kvcalloc(size, sizeof(*buckets), GFP_KERNEL);
 	if (!buckets) {
@@ -1543,7 +1535,6 @@ static int ipoib_neigh_hash_init(struct
 	atomic_set(&ntbl->entries, 0);
 
 	/* start garbage collection */
-	clear_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
 	queue_delayed_work(priv->wq, &priv->neigh_reap_task,
 			   arp_tbl.gc_interval);
 
@@ -1653,15 +1644,11 @@ out_unlock:
 static void ipoib_neigh_hash_uninit(struct net_device *dev)
 {
 	struct ipoib_dev_priv *priv = ipoib_priv(dev);
-	int stopped;
 
 	ipoib_dbg(priv, "ipoib_neigh_hash_uninit\n");
 	init_completion(&priv->ntbl.deleted);
 
-	/* Stop GC if called at init fail need to cancel work */
-	stopped = test_and_set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
-	if (!stopped)
-		cancel_delayed_work(&priv->neigh_reap_task);
+	cancel_delayed_work_sync(&priv->neigh_reap_task);
 
 	ipoib_flush_neighs(priv);
 
@@ -1799,12 +1786,15 @@ int ipoib_dev_init(struct net_device *de
 		if (ipoib_ib_dev_open(dev)) {
 			pr_warn("%s failed to open device\n", dev->name);
 			ret = -ENODEV;
-			goto out_dev_uninit;
+			goto out_hash_uninit;
 		}
 	}
 
 	return 0;
 
+out_hash_uninit:
+	ipoib_neigh_hash_uninit(dev);
+
 out_dev_uninit:
 	ipoib_ib_dev_cleanup(dev);
 
@@ -1834,8 +1824,7 @@ void ipoib_dev_cleanup(struct net_device
 	/* Delete any child interfaces first */
 	list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) {
 		/* Stop GC on child */
-		set_bit(IPOIB_STOP_NEIGH_GC, &cpriv->flags);
-		cancel_delayed_work(&cpriv->neigh_reap_task);
+		cancel_delayed_work_sync(&cpriv->neigh_reap_task);
 		unregister_netdevice_queue(cpriv->dev, &head);
 	}
 	unregister_netdevice_many(&head);
@@ -2343,8 +2332,7 @@ register_failed:
 	ib_unregister_event_handler(&priv->event_handler);
 	flush_workqueue(ipoib_workqueue);
 	/* Stop GC if started before flush */
-	set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
-	cancel_delayed_work(&priv->neigh_reap_task);
+	cancel_delayed_work_sync(&priv->neigh_reap_task);
 	flush_workqueue(priv->wq);
 	ipoib_dev_cleanup(priv->dev);
 
@@ -2409,8 +2397,7 @@ static void ipoib_remove_one(struct ib_d
 		rtnl_unlock();
 
 		/* Stop GC */
-		set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
-		cancel_delayed_work(&priv->neigh_reap_task);
+		cancel_delayed_work_sync(&priv->neigh_reap_task);
 		flush_workqueue(priv->wq);
 
 		/* Wrap rtnl_lock/unlock with mutex to protect sysfs calls */