Blob Blame History Raw
From 44bda4b7d26e9fffed6d7152d98a2e9edaeb2a76 Mon Sep 17 00:00:00 2001
From: Hari Vyas <hari.vyas@broadcom.com>
Date: Tue, 3 Jul 2018 14:35:41 +0530
Subject: [PATCH] PCI: Fix is_added/is_busmaster race condition
Git-commit: 44bda4b7d26e9fffed6d7152d98a2e9edaeb2a76
Patch-mainline: v4.18
References: FATE#326303

When a PCI device is detected, pdev->is_added is set to 1 and proc and
sysfs entries are created.

When the device is removed, pdev->is_added is checked for one and then
device is detached with clearing of proc and sys entries and at end,
pdev->is_added is set to 0.

is_added and is_busmaster are bit fields in pci_dev structure sharing same
memory location.

A strange issue was observed with multiple removal and rescan of a PCIe
NVMe device using sysfs commands where is_added flag was observed as zero
instead of one while removing device and proc,sys entries are not cleared.
This causes issue in later device addition with warning message
"proc_dir_entry" already registered.

Debugging revealed a race condition between the PCI core setting the
is_added bit in pci_bus_add_device() and the NVMe driver reset work-queue
setting the is_busmaster bit in pci_set_master().  As these fields are not
handled atomically, that clears the is_added bit.

Move the is_added bit to a separate private flag variable and use atomic
functions to set and retrieve the device addition state.  This avoids the
race because is_added no longer shares a memory location with is_busmaster.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=200283
Signed-off-by: Hari Vyas <hari.vyas@broadcom.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Lukas Wunner <lukas@wunner.de>
Acked-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Oliver Neukum <oneukum@suse.com>
---
 arch/powerpc/kernel/pci-common.c          |    4 +++-
 arch/powerpc/platforms/powernv/pci-ioda.c |    3 ++-
 arch/powerpc/platforms/pseries/setup.c    |    3 ++-
 drivers/pci/bus.c                         |    6 +++---
 drivers/pci/hotplug/acpiphp_glue.c        |    2 +-
 drivers/pci/pci.h                         |   11 +++++++++++
 drivers/pci/probe.c                       |    4 ++--
 drivers/pci/remove.c                      |    5 +++--
 include/linux/pci.h                       |    1 -
 9 files changed, 27 insertions(+), 12 deletions(-)

--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -42,6 +42,8 @@
 #include <asm/ppc-pci.h>
 #include <asm/eeh.h>
 
+#include "../../../drivers/pci/pci.h"
+
 /* hose_spinlock protects accesses to the the phb_bitmap. */
 static DEFINE_SPINLOCK(hose_spinlock);
 LIST_HEAD(hose_list);
@@ -1106,7 +1108,7 @@ void pcibios_setup_bus_devices(struct pc
 		/* Cardbus can call us to add new devices to a bus, so ignore
 		 * those who are already fully discovered
 		 */
-		if (dev->is_added)
+		if (pci_dev_is_added(dev))
 			continue;
 
 		pcibios_setup_device(dev);
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -46,6 +46,7 @@
 
 #include "powernv.h"
 #include "pci.h"
+#include "../../../../drivers/pci/pci.h"
 
 #define PNV_IODA1_M64_NUM	16	/* Number of M64 BARs	*/
 #define PNV_IODA1_M64_SEGS	8	/* Segments per M64 BAR	*/
@@ -2934,7 +2935,7 @@ static void pnv_pci_ioda_fixup_iov_resou
 	struct pci_dn *pdn;
 	int mul, total_vfs;
 
-	if (!pdev->is_physfn || pdev->is_added)
+	if (!pdev->is_physfn || pci_dev_is_added(pdev))
 		return;
 
 	pdn = pci_get_pdn(pdev);
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -71,6 +71,7 @@
 #include <asm/security_features.h>
 
 #include "pseries.h"
+#include "../../../../drivers/pci/pci.h"
 
 int CMO_PrPSP = -1;
 int CMO_SecPSP = -1;
@@ -664,7 +665,7 @@ static void pseries_pci_fixup_iov_resour
 	const int *indexes;
 	struct device_node *dn = pci_device_to_OF_node(pdev);
 
-	if (!pdev->is_physfn || pdev->is_added)
+	if (!pdev->is_physfn || pci_dev_is_added(pdev))
 		return;
 	/*Firmware must support open sriov otherwise dont configure*/
 	indexes = of_get_property(dn, "ibm,open-sriov-vf-bar-info", NULL);
--- a/drivers/pci/bus.c
+++ b/drivers/pci/bus.c
@@ -331,7 +331,7 @@ void pci_bus_add_device(struct pci_dev *
 		return;
 	}
 
-	dev->is_added = 1;
+	pci_dev_assign_added(dev, true);
 }
 EXPORT_SYMBOL_GPL(pci_bus_add_device);
 
@@ -348,14 +348,14 @@ void pci_bus_add_devices(const struct pc
 
 	list_for_each_entry(dev, &bus->devices, bus_list) {
 		/* Skip already-added devices */
-		if (dev->is_added)
+		if (pci_dev_is_added(dev))
 			continue;
 		pci_bus_add_device(dev);
 	}
 
 	list_for_each_entry(dev, &bus->devices, bus_list) {
 		/* Skip if device attach failed */
-		if (!dev->is_added)
+		if (!pci_dev_is_added(dev))
 			continue;
 		child = dev->subordinate;
 		if (child)
--- a/drivers/pci/hotplug/acpiphp_glue.c
+++ b/drivers/pci/hotplug/acpiphp_glue.c
@@ -523,7 +523,7 @@ static void enable_slot(struct acpiphp_s
 
 	list_for_each_entry(dev, &bus->devices, bus_list) {
 		/* Assume that newly added devices are powered on already. */
-		if (!dev->is_added)
+		if (!pci_dev_is_added(dev))
 			dev->current_state = PCI_D0;
 	}
 
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -280,6 +280,7 @@ struct pci_sriov {
 
 /* pci_dev priv_flags */
 #define PCI_DEV_DISCONNECTED 0
+#define PCI_DEV_ADDED 1
 
 static inline int pci_dev_set_disconnected(struct pci_dev *dev, void *unused)
 {
@@ -292,6 +293,16 @@ static inline bool pci_dev_is_disconnect
 	return test_bit(PCI_DEV_DISCONNECTED, &dev->priv_flags);
 }
 
+static inline void pci_dev_assign_added(struct pci_dev *dev, bool added)
+{
+	assign_bit(PCI_DEV_ADDED, &dev->priv_flags, added);
+}
+
+static inline bool pci_dev_is_added(const struct pci_dev *dev)
+{
+	return test_bit(PCI_DEV_ADDED, &dev->priv_flags);
+}
+
 #ifdef CONFIG_PCI_ATS
 void pci_restore_ats_state(struct pci_dev *dev);
 #else
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -2186,13 +2186,13 @@ int pci_scan_slot(struct pci_bus *bus, i
 	dev = pci_scan_single_device(bus, devfn);
 	if (!dev)
 		return 0;
-	if (!dev->is_added)
+	if (!pci_dev_is_added(dev))
 		nr++;
 
 	for (fn = next_fn(bus, dev, 0); fn > 0; fn = next_fn(bus, dev, fn)) {
 		dev = pci_scan_single_device(bus, devfn + fn);
 		if (dev) {
-			if (!dev->is_added)
+			if (!pci_dev_is_added(dev))
 				nr++;
 			dev->multifunction = 1;
 		}
--- a/drivers/pci/remove.c
+++ b/drivers/pci/remove.c
@@ -18,11 +18,12 @@ static void pci_stop_dev(struct pci_dev
 {
 	pci_pme_active(dev, false);
 
-	if (dev->is_added) {
+	if (pci_dev_is_added(dev)) {
 		device_release_driver(&dev->dev);
 		pci_proc_detach_device(dev);
 		pci_remove_sysfs_dev_files(dev);
-		dev->is_added = 0;
+
+		pci_dev_assign_added(dev, false);
 	}
 
 	if (dev->bus->self)
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -352,7 +352,6 @@ struct pci_dev {
 	unsigned int	transparent:1;	/* Subtractive decode PCI bridge */
 	unsigned int	multifunction:1;/* Part of multi-function device */
 	/* keep track of device state */
-	unsigned int	is_added:1;
 	unsigned int	is_busmaster:1; /* device is busmaster */
 	unsigned int	no_msi:1;	/* device may not use msi */
 	unsigned int	no_64bit_msi:1; /* device may only use 32-bit MSIs */