From 4c37f6f8ab9a44bf07cd8d0d19773532757db378 Mon Sep 17 00:00:00 2001 From: Vasant Karasulli Date: Mar 20 2024 10:13:15 +0000 Subject: iommu/vt-d: Don't issue ATS Invalidation request when device is disconnected (git-fixes). --- diff --git a/patches.suse/0001-PCI-Make-pci_dev_is_disconnected-helper-public-for-o.patch b/patches.suse/0001-PCI-Make-pci_dev_is_disconnected-helper-public-for-o.patch new file mode 100644 index 0000000..dcc6dba --- /dev/null +++ b/patches.suse/0001-PCI-Make-pci_dev_is_disconnected-helper-public-for-o.patch @@ -0,0 +1,67 @@ +From 39714fd73c6b60a8d27bcc5b431afb0828bf4434 Mon Sep 17 00:00:00 2001 +From: Ethan Zhao +Date: Tue, 5 Mar 2024 20:21:14 +0800 +Subject: [PATCH] PCI: Make pci_dev_is_disconnected() helper public for other + drivers +Git-commit: 39714fd73c6b60a8d27bcc5b431afb0828bf4434 +Patch-mainline: v6.8-rc3 +References: git-fixes + +Make pci_dev_is_disconnected() public so that it can be called from +Intel VT-d driver to quickly fix/workaround the surprise removal +unplug hang issue for those ATS capable devices on PCIe switch downstream +hotplug capable ports. + +Beside pci_device_is_present() function, this one has no config space +space access, so is light enough to optimize the normal pure surprise +removal and safe removal flow. + +Acked-by: Bjorn Helgaas +Reviewed-by: Dan Carpenter +Tested-by: Haorong Ye +Signed-off-by: Ethan Zhao +Link: https://lore.kernel.org/r/20240301080727.3529832-2-haifeng.zhao@linux.intel.com +Signed-off-by: Lu Baolu +Signed-off-by: Joerg Roedel +Acked-by: Vasant Karasulli + +--- + drivers/pci/pci.h | 5 ----- + include/linux/pci.h | 5 +++++ + 2 files changed, 5 insertions(+), 5 deletions(-) + +diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h +index e9750b1b19ba..bfc56f7bee1c 100644 +--- a/drivers/pci/pci.h ++++ b/drivers/pci/pci.h +@@ -368,11 +368,6 @@ static inline int pci_dev_set_disconnected(struct pci_dev *dev, void *unused) + return 0; + } + +-static inline bool pci_dev_is_disconnected(const struct pci_dev *dev) +-{ +- return dev->error_state == pci_channel_io_perm_failure; +-} +- + /* pci_dev priv_flags */ + #define PCI_DEV_ADDED 0 + #define PCI_DPC_RECOVERED 1 +diff --git a/include/linux/pci.h b/include/linux/pci.h +index 7ab0d13672da..213109d3c601 100644 +--- a/include/linux/pci.h ++++ b/include/linux/pci.h +@@ -2517,6 +2517,11 @@ static inline struct pci_dev *pcie_find_root_port(struct pci_dev *dev) + return NULL; + } + ++static inline bool pci_dev_is_disconnected(const struct pci_dev *dev) ++{ ++ return dev->error_state == pci_channel_io_perm_failure; ++} ++ + void pci_request_acs(void); + bool pci_acs_enabled(struct pci_dev *pdev, u16 acs_flags); + bool pci_acs_path_enabled(struct pci_dev *start, +-- +2.34.1 + diff --git a/patches.suse/0002-iommu-vt-d-Don-t-issue-ATS-Invalidation-request-when.patch b/patches.suse/0002-iommu-vt-d-Don-t-issue-ATS-Invalidation-request-when.patch new file mode 100644 index 0000000..38916b8 --- /dev/null +++ b/patches.suse/0002-iommu-vt-d-Don-t-issue-ATS-Invalidation-request-when.patch @@ -0,0 +1,203 @@ +From 4fc82cd907ac075648789cc3a00877778aa1838b Mon Sep 17 00:00:00 2001 +From: Ethan Zhao +Date: Tue, 5 Mar 2024 20:21:15 +0800 +Subject: [PATCH] iommu/vt-d: Don't issue ATS Invalidation request when device + is disconnected +Git-commit: 4fc82cd907ac075648789cc3a00877778aa1838b +Patch-mainline: v6.8-rc3 +References: git-fixes + +For those endpoint devices connect to system via hotplug capable ports, +users could request a hot reset to the device by flapping device's link +through setting the slot's link control register, as pciehp_ist() DLLSC +interrupt sequence response, pciehp will unload the device driver and +then power it off. thus cause an IOMMU device-TLB invalidation (Intel +VT-d spec, or ATS Invalidation in PCIe spec r6.1) request for non-existence +target device to be sent and deadly loop to retry that request after ITE +fault triggered in interrupt context. + +That would cause following continuous hard lockup warning and system hang + +[ 4211.433662] pcieport 0000:17:01.0: pciehp: Slot(108): Link Down +[ 4211.433664] pcieport 0000:17:01.0: pciehp: Slot(108): Card not present +[ 4223.822591] NMI watchdog: Watchdog detected hard LOCKUP on cpu 144 +[ 4223.822622] CPU: 144 PID: 1422 Comm: irq/57-pciehp Kdump: loaded Tainted: G S + OE kernel version xxxx +[ 4223.822623] Hardware name: vendorname xxxx 666-106, +BIOS 01.01.02.03.01 05/15/2023 +[ 4223.822623] RIP: 0010:qi_submit_sync+0x2c0/0x490 +[ 4223.822624] Code: 48 be 00 00 00 00 00 08 00 00 49 85 74 24 20 0f 95 c1 48 8b + 57 10 83 c1 04 83 3c 1a 03 0f 84 a2 01 00 00 49 8b 04 24 8b 70 34 <40> f6 c6 1 +0 74 17 49 8b 04 24 8b 80 80 00 00 00 89 c2 d3 fa 41 39 +[ 4223.822624] RSP: 0018:ffffc4f074f0bbb8 EFLAGS: 00000093 +[ 4223.822625] RAX: ffffc4f040059000 RBX: 0000000000000014 RCX: 0000000000000005 +[ 4223.822625] RDX: ffff9f3841315800 RSI: 0000000000000000 RDI: ffff9f38401a8340 +[ 4223.822625] RBP: ffff9f38401a8340 R08: ffffc4f074f0bc00 R09: 0000000000000000 +[ 4223.822626] R10: 0000000000000010 R11: 0000000000000018 R12: ffff9f384005e200 +[ 4223.822626] R13: 0000000000000004 R14: 0000000000000046 R15: 0000000000000004 +[ 4223.822626] FS: 0000000000000000(0000) GS:ffffa237ae400000(0000) +knlGS:0000000000000000 +[ 4223.822627] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 4223.822627] CR2: 00007ffe86515d80 CR3: 000002fd3000a001 CR4: 0000000000770ee0 +[ 4223.822627] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +[ 4223.822628] DR3: 0000000000000000 DR6: 00000000fffe07f0 DR7: 0000000000000400 +[ 4223.822628] PKRU: 55555554 +[ 4223.822628] Call Trace: +[ 4223.822628] qi_flush_dev_iotlb+0xb1/0xd0 +[ 4223.822628] __dmar_remove_one_dev_info+0x224/0x250 +[ 4223.822629] dmar_remove_one_dev_info+0x3e/0x50 +[ 4223.822629] intel_iommu_release_device+0x1f/0x30 +[ 4223.822629] iommu_release_device+0x33/0x60 +[ 4223.822629] iommu_bus_notifier+0x7f/0x90 +[ 4223.822630] blocking_notifier_call_chain+0x60/0x90 +[ 4223.822630] device_del+0x2e5/0x420 +[ 4223.822630] pci_remove_bus_device+0x70/0x110 +[ 4223.822630] pciehp_unconfigure_device+0x7c/0x130 +[ 4223.822631] pciehp_disable_slot+0x6b/0x100 +[ 4223.822631] pciehp_handle_presence_or_link_change+0xd8/0x320 +[ 4223.822631] pciehp_ist+0x176/0x180 +[ 4223.822631] ? irq_finalize_oneshot.part.50+0x110/0x110 +[ 4223.822632] irq_thread_fn+0x19/0x50 +[ 4223.822632] irq_thread+0x104/0x190 +[ 4223.822632] ? irq_forced_thread_fn+0x90/0x90 +[ 4223.822632] ? irq_thread_check_affinity+0xe0/0xe0 +[ 4223.822633] kthread+0x114/0x130 +[ 4223.822633] ? __kthread_cancel_work+0x40/0x40 +[ 4223.822633] ret_from_fork+0x1f/0x30 +[ 4223.822633] Kernel panic - not syncing: Hard LOCKUP +[ 4223.822634] CPU: 144 PID: 1422 Comm: irq/57-pciehp Kdump: loaded Tainted: G S + OE kernel version xxxx +[ 4223.822634] Hardware name: vendorname xxxx 666-106, +BIOS 01.01.02.03.01 05/15/2023 +[ 4223.822634] Call Trace: +[ 4223.822634] +[ 4223.822635] dump_stack+0x6d/0x88 +[ 4223.822635] panic+0x101/0x2d0 +[ 4223.822635] ? ret_from_fork+0x11/0x30 +[ 4223.822635] nmi_panic.cold.14+0xc/0xc +[ 4223.822636] watchdog_overflow_callback.cold.8+0x6d/0x81 +[ 4223.822636] __perf_event_overflow+0x4f/0xf0 +[ 4223.822636] handle_pmi_common+0x1ef/0x290 +[ 4223.822636] ? __set_pte_vaddr+0x28/0x40 +[ 4223.822637] ? flush_tlb_one_kernel+0xa/0x20 +[ 4223.822637] ? __native_set_fixmap+0x24/0x30 +[ 4223.822637] ? ghes_copy_tofrom_phys+0x70/0x100 +[ 4223.822637] ? __ghes_peek_estatus.isra.16+0x49/0xa0 +[ 4223.822637] intel_pmu_handle_irq+0xba/0x2b0 +[ 4223.822638] perf_event_nmi_handler+0x24/0x40 +[ 4223.822638] nmi_handle+0x4d/0xf0 +[ 4223.822638] default_do_nmi+0x49/0x100 +[ 4223.822638] exc_nmi+0x134/0x180 +[ 4223.822639] end_repeat_nmi+0x16/0x67 +[ 4223.822639] RIP: 0010:qi_submit_sync+0x2c0/0x490 +[ 4223.822639] Code: 48 be 00 00 00 00 00 08 00 00 49 85 74 24 20 0f 95 c1 48 8b + 57 10 83 c1 04 83 3c 1a 03 0f 84 a2 01 00 00 49 8b 04 24 8b 70 34 <40> f6 c6 10 + 74 17 49 8b 04 24 8b 80 80 00 00 00 89 c2 d3 fa 41 39 +[ 4223.822640] RSP: 0018:ffffc4f074f0bbb8 EFLAGS: 00000093 +[ 4223.822640] RAX: ffffc4f040059000 RBX: 0000000000000014 RCX: 0000000000000005 +[ 4223.822640] RDX: ffff9f3841315800 RSI: 0000000000000000 RDI: ffff9f38401a8340 +[ 4223.822641] RBP: ffff9f38401a8340 R08: ffffc4f074f0bc00 R09: 0000000000000000 +[ 4223.822641] R10: 0000000000000010 R11: 0000000000000018 R12: ffff9f384005e200 +[ 4223.822641] R13: 0000000000000004 R14: 0000000000000046 R15: 0000000000000004 +[ 4223.822641] ? qi_submit_sync+0x2c0/0x490 +[ 4223.822642] ? qi_submit_sync+0x2c0/0x490 +[ 4223.822642] +[ 4223.822642] qi_flush_dev_iotlb+0xb1/0xd0 +[ 4223.822642] __dmar_remove_one_dev_info+0x224/0x250 +[ 4223.822643] dmar_remove_one_dev_info+0x3e/0x50 +[ 4223.822643] intel_iommu_release_device+0x1f/0x30 +[ 4223.822643] iommu_release_device+0x33/0x60 +[ 4223.822643] iommu_bus_notifier+0x7f/0x90 +[ 4223.822644] blocking_notifier_call_chain+0x60/0x90 +[ 4223.822644] device_del+0x2e5/0x420 +[ 4223.822644] pci_remove_bus_device+0x70/0x110 +[ 4223.822644] pciehp_unconfigure_device+0x7c/0x130 +[ 4223.822644] pciehp_disable_slot+0x6b/0x100 +[ 4223.822645] pciehp_handle_presence_or_link_change+0xd8/0x320 +[ 4223.822645] pciehp_ist+0x176/0x180 +[ 4223.822645] ? irq_finalize_oneshot.part.50+0x110/0x110 +[ 4223.822645] irq_thread_fn+0x19/0x50 +[ 4223.822646] irq_thread+0x104/0x190 +[ 4223.822646] ? irq_forced_thread_fn+0x90/0x90 +[ 4223.822646] ? irq_thread_check_affinity+0xe0/0xe0 +[ 4223.822646] kthread+0x114/0x130 +[ 4223.822647] ? __kthread_cancel_work+0x40/0x40 +[ 4223.822647] ret_from_fork+0x1f/0x30 +[ 4223.822647] Kernel Offset: 0x6400000 from 0xffffffff81000000 (relocation +Range: 0xffffffff80000000-0xffffffffbfffffff) + +Such issue could be triggered by all kinds of regular surprise removal +hotplug operation. like: + +1. pull EP(endpoint device) out directly. +2. turn off EP's power. +3. bring the link down. +etc. + +this patch aims to work for regular safe removal and surprise removal +unplug. these hot unplug handling process could be optimized for fix the +ATS Invalidation hang issue by calling pci_dev_is_disconnected() in +function devtlb_invalidation_with_pasid() to check target device state to +avoid sending meaningless ATS Invalidation request to iommu when device is +gone. (see IMPLEMENTATION NOTE in PCIe spec r6.1 section 10.3.1) + +For safe removal, device wouldn't be removed until the whole software +handling process is done, it wouldn't trigger the hard lock up issue +caused by too long ATS Invalidation timeout wait. In safe removal path, +device state isn't set to pci_channel_io_perm_failure in +pciehp_unconfigure_device() by checking 'presence' parameter, calling +pci_dev_is_disconnected() in devtlb_invalidation_with_pasid() will return +false there, wouldn't break the function. + +For surprise removal, device state is set to pci_channel_io_perm_failure in +pciehp_unconfigure_device(), means device is already gone (disconnected) +call pci_dev_is_disconnected() in devtlb_invalidation_with_pasid() will +return true to break the function not to send ATS Invalidation request to +the disconnected device blindly, thus avoid to trigger further ITE fault, +and ITE fault will block all invalidation request to be handled. +furthermore retry the timeout request could trigger hard lockup. + +safe removal (present) & surprise removal (not present) + +pciehp_ist() + pciehp_handle_presence_or_link_change() + pciehp_disable_slot() + remove_board() + pciehp_unconfigure_device(presence) { + if (!presence) + pci_walk_bus(parent, pci_dev_set_disconnected, NULL); + } + +this patch works for regular safe removal and surprise removal of ATS +capable endpoint on PCIe switch downstream ports. + +Fixes: 6f7db75e1c46 ("iommu/vt-d: Add second level page table interface") +Reviewed-by: Dan Carpenter +Tested-by: Haorong Ye +Signed-off-by: Ethan Zhao +Link: https://lore.kernel.org/r/20240301080727.3529832-3-haifeng.zhao@linux.intel.com +Signed-off-by: Lu Baolu +Signed-off-by: Joerg Roedel +Acked-by: Vasant Karasulli + +--- + drivers/iommu/intel/pasid.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c +index 3239cefa4c33..953592125e4a 100644 +--- a/drivers/iommu/intel/pasid.c ++++ b/drivers/iommu/intel/pasid.c +@@ -214,6 +214,9 @@ devtlb_invalidation_with_pasid(struct intel_iommu *iommu, + if (!info || !info->ats_enabled) + return; + ++ if (pci_dev_is_disconnected(to_pci_dev(dev))) ++ return; ++ + sid = info->bus << 8 | info->devfn; + qdep = info->ats_qdep; + pfsid = info->pfsid; +-- +2.34.1 + diff --git a/series.conf b/series.conf index 63183dd..8a3e4ec 100644 --- a/series.conf +++ b/series.conf @@ -45611,6 +45611,8 @@ patches.suse/Bluetooth-mgmt-Remove-leftover-queuing-of-power_off-.patch patches.suse/Bluetooth-Remove-superfluous-call-to-hci_conn_check_.patch patches.suse/net-phy-fix-phy_get_internal_delay-accessing-an-empt.patch + patches.suse/0001-PCI-Make-pci_dev_is_disconnected-helper-public-for-o.patch + patches.suse/0002-iommu-vt-d-Don-t-issue-ATS-Invalidation-request-when.patch patches.suse/0001-iommu-amd-Mark-interrupt-as-managed.patch patches.suse/pwm-mediatek-Update-kernel-doc-for-struct-pwm_mediat.patch patches.suse/mmc-tmio-avoid-concurrent-runs-of-mmc_request_done.patch