Blob Blame History Raw
From: "Michael J. Ruhl" <michael.j.ruhl@intel.com>
Date: Wed, 15 Aug 2018 22:54:49 -0700
Subject: IB/hfi1: Invalid NUMA node information can cause a divide by zero
Patch-mainline: v4.19-rc1
Git-commit: c513de490f808d8480346f9a58e6a4a5f3de12e7
References: bsc#1060463 FATE#323043

If the system BIOS does not supply NUMA node information to the
PCI devices, the NUMA node is selected by choosing the current
node.

This can lead to the following crash:

divide error: 0000 SMP
CPU: 0 PID: 4 Comm: kworker/0:0 Tainted: G          IOE
------------   3.10.0-693.21.1.el7.x86_64 #1
Hardware name: Intel Corporation S2600KP/S2600KP, BIOS
SE5C610.86B.01.01.0005.101720141054 10/17/2014
Workqueue: events work_for_cpu_fn
task: ffff880174480fd0 ti: ffff880174488000 task.ti: ffff880174488000
RIP: 0010: [<ffffffffc020ac69>] hfi1_dev_affinity_init+0x129/0x6a0 [hfi1]
RSP: 0018:ffff88017448bbf8  EFLAGS: 00010246
RAX: 0000000000000011 RBX: ffff88107ffba6c0 RCX: ffff88085c22e130
RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff880824ad0000
RBP: ffff88017448bc48 R08: 0000000000000011 R09: 0000000000000002
R10: ffff8808582b6ca0 R11: 0000000000003151 R12: ffff8808582b6ca0
R13: ffff8808582b6518 R14: ffff8808582b6010 R15: 0000000000000012
FS:  0000000000000000(0000) GS:ffff88085ec00000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007efc707404f0 CR3: 0000000001a02000 CR4: 00000000001607f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Call Trace:
 hfi1_init_dd+0x14b3/0x27a0 [hfi1]
 ? pcie_capability_write_word+0x46/0x70
 ? hfi1_pcie_init+0xc0/0x200 [hfi1]
 do_init_one+0x153/0x4c0 [hfi1]
 ? sched_clock_cpu+0x85/0xc0
 init_one+0x1b5/0x260 [hfi1]
 local_pci_probe+0x4a/0xb0
 work_for_cpu_fn+0x1a/0x30
 process_one_work+0x17f/0x440
 worker_thread+0x278/0x3c0
 ? manage_workers.isra.24+0x2a0/0x2a0
 kthread+0xd1/0xe0
 ? insert_kthread_work+0x40/0x40
 ret_from_fork+0x77/0xb0
 ? insert_kthread_work+0x40/0x40

If the BIOS is not supplying NUMA information:
  - set the default table count to 1 for all possible nodes
  - select node 0 (instead of current NUMA) node to get consistent
    performance
  - generate an error indicating that the BIOS should be upgraded

Reviewed-by: Gary Leshner <gary.s.leshner@intel.com>
Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Acked-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
---
 drivers/infiniband/hw/hfi1/affinity.c |   24 +++++++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

--- a/drivers/infiniband/hw/hfi1/affinity.c
+++ b/drivers/infiniband/hw/hfi1/affinity.c
@@ -198,7 +198,7 @@ int node_affinity_init(void)
 		while ((dev = pci_get_device(ids->vendor, ids->device, dev))) {
 			node = pcibus_to_node(dev->bus);
 			if (node < 0)
-				node = numa_node_id();
+				goto out;
 
 			hfi1_per_node_cntr[node]++;
 		}
@@ -206,6 +206,18 @@ int node_affinity_init(void)
 	}
 
 	return 0;
+
+out:
+	/*
+	 * Invalid PCI NUMA node information found, note it, and populate
+	 * our database 1:1.
+	 */
+	pr_err("HFI: Invalid PCI NUMA node. Performance may be affected\n");
+	pr_err("HFI: System BIOS may need to be upgraded\n");
+	for (node = 0; node < node_affinity.num_possible_nodes; node++)
+		hfi1_per_node_cntr[node] = 1;
+
+	return 0;
 }
 
 static void node_affinity_destroy(struct hfi1_affinity_node *entry)
@@ -622,8 +634,14 @@ int hfi1_dev_affinity_init(struct hfi1_d
 	int curr_cpu, possible, i, ret;
 	bool new_entry = false;
 
-	if (node < 0)
-		node = numa_node_id();
+	/*
+	 * If the BIOS does not have the NUMA node information set, select
+	 * NUMA 0 so we get consistent performance.
+	 */
+	if (node < 0) {
+		dd_dev_err(dd, "Invalid PCI NUMA node. Performance may be affected\n");
+		node = 0;
+	}
 	dd->node = node;
 
 	local_mask = cpumask_of_node(dd->node);