Joerg Roedel 9a527b
From: David Woodhouse <dwmw@amazon.co.uk>
Joerg Roedel 9a527b
Date: Wed, 7 Oct 2020 13:20:43 +0100
Joerg Roedel 9a527b
Subject: [PATCH 2/5] x86/msi: Only use high bits of MSI address for DMAR unit
Joerg Roedel 9a527b
Patch-mainline: Never, upstream uses different implementation
Joerg Roedel 9a527b
References: bsc#1181001, jsc#ECO-3191
Joerg Roedel 9a527b
Joerg Roedel 9a527b
The Intel IOMMU has an MSI-like configuration for its interrupt, but
Joerg Roedel 9a527b
it isn't really MSI. So it gets to abuse the high 32 bits of the address,
Joerg Roedel 9a527b
and puts the high 24 bits of the extended APIC ID there.
Joerg Roedel 9a527b
Joerg Roedel 9a527b
This isn't something that can be used in the general case for real MSIs,
Joerg Roedel 9a527b
since external devices using the high bits of the address would be
Joerg Roedel 9a527b
performing writes to actual memory space above 4GiB, not targeted at the
Joerg Roedel 9a527b
APIC.
Joerg Roedel 9a527b
Joerg Roedel 9a527b
Factor the hack out and allow it only to be used when appropriate,
Joerg Roedel 9a527b
adding a WARN_ON_ONCE() if other MSIs are targeted at an unreachable
Joerg Roedel 9a527b
APIC ID. In *theory* that should never happen since the compatibility
Joerg Roedel 9a527b
MSI messages are not supposed to be used with Interrupt Remapping
Joerg Roedel 9a527b
enabled. In practice, if IR is enabled but some devices aren't within
Joerg Roedel 9a527b
scope of any given remapping unit, it might happen. But that's a longer
Joerg Roedel 9a527b
story and this warning is the right thing to do in that case for the
Joerg Roedel 9a527b
short term.
Joerg Roedel 9a527b
Joerg Roedel 9a527b
The x2apic_enabled() check isn't needed because Linux won't bring up
Joerg Roedel 9a527b
CPUs with higher APIC IDs unless x2apic is enabled anyway.
Joerg Roedel 9a527b
Joerg Roedel 9a527b
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Joerg Roedel 9a527b
Acked-by: Joerg Roedel <jroedel@suse.de>
Joerg Roedel 9a527b
---
Joerg Roedel 9a527b
 arch/x86/kernel/apic/msi.c |   36 +++++++++++++++++++++++++++++++-----
Joerg Roedel 9a527b
 1 file changed, 31 insertions(+), 5 deletions(-)
Joerg Roedel 9a527b
Joerg Roedel 9a527b
--- a/arch/x86/kernel/apic/msi.c
Joerg Roedel 9a527b
+++ b/arch/x86/kernel/apic/msi.c
Joerg Roedel 9a527b
@@ -25,15 +25,13 @@
Joerg Roedel 9a527b
 
Joerg Roedel 9a527b
 static struct irq_domain *msi_default_domain;
Joerg Roedel 9a527b
 
Joerg Roedel 9a527b
-static void irq_msi_compose_msg(struct irq_data *data, struct msi_msg *msg)
Joerg Roedel 9a527b
+static void __irq_msi_compose_msg(struct irq_data *data, struct msi_msg *msg,
Joerg Roedel 9a527b
+				  bool dmar)
Joerg Roedel 9a527b
 {
Joerg Roedel 9a527b
 	struct irq_cfg *cfg = irqd_cfg(data);
Joerg Roedel 9a527b
 
Joerg Roedel 9a527b
 	msg->address_hi = MSI_ADDR_BASE_HI;
Joerg Roedel 9a527b
 
Joerg Roedel 9a527b
-	if (x2apic_enabled())
Joerg Roedel 9a527b
-		msg->address_hi |= MSI_ADDR_EXT_DEST_ID(cfg->dest_apicid);
Joerg Roedel 9a527b
-
Joerg Roedel 9a527b
 	msg->address_lo =
Joerg Roedel 9a527b
 		MSI_ADDR_BASE_LO |
Joerg Roedel 9a527b
 		((apic->irq_dest_mode == 0) ?
Joerg Roedel 9a527b
@@ -51,6 +49,34 @@ static void irq_msi_compose_msg(struct i
Joerg Roedel 9a527b
 			MSI_DATA_DELIVERY_FIXED :
Joerg Roedel 9a527b
 			MSI_DATA_DELIVERY_LOWPRI) |
Joerg Roedel 9a527b
 		MSI_DATA_VECTOR(cfg->vector);
Joerg Roedel 9a527b
+
Joerg Roedel 9a527b
+	/*
Joerg Roedel 9a527b
+	 * Only the IOMMU itself can use the trick of putting destination
Joerg Roedel 9a527b
+	 * APIC ID into the high bits of the address. Anything else would
Joerg Roedel 9a527b
+	 * just be writing to memory if it tried that, and needs IR to
Joerg Roedel 9a527b
+	 * address higher APIC IDs.
Joerg Roedel 9a527b
+	 */
Joerg Roedel 9a527b
+	if (dmar)
Joerg Roedel 9a527b
+		msg->address_hi |= MSI_ADDR_EXT_DEST_ID(cfg->dest_apicid);
Joerg Roedel 9a527b
+	else
Joerg Roedel 9a527b
+		WARN_ON_ONCE(MSI_ADDR_EXT_DEST_ID(cfg->dest_apicid));
Joerg Roedel 9a527b
+}
Joerg Roedel 9a527b
+
Joerg Roedel 9a527b
+static void irq_msi_compose_msg(struct irq_data *data, struct msi_msg *msg)
Joerg Roedel 9a527b
+{
Joerg Roedel 9a527b
+	__irq_msi_compose_msg(data, msg, false);
Joerg Roedel 9a527b
+}
Joerg Roedel 9a527b
+
Joerg Roedel 9a527b
+
Joerg Roedel 9a527b
+/*
Joerg Roedel 9a527b
+ * The Intel IOMMU (ab)uses the high bits of the MSI address to contain the
Joerg Roedel 9a527b
+ * high bits of the destination APIC ID. This can't be done in the general
Joerg Roedel 9a527b
+ * case for MSIs as it would be targeting real memory above 4GiB not the
Joerg Roedel 9a527b
+ * APIC.
Joerg Roedel 9a527b
+ */
Joerg Roedel 9a527b
+static void dmar_msi_compose_msg(struct irq_data *data, struct msi_msg *msg)
Joerg Roedel 9a527b
+{
Joerg Roedel 9a527b
+	__irq_msi_compose_msg(data, msg, true);
Joerg Roedel 9a527b
 }
Joerg Roedel 9a527b
 
Joerg Roedel 9a527b
 /*
Joerg Roedel 9a527b
@@ -186,7 +212,7 @@ static struct irq_chip dmar_msi_controll
Joerg Roedel 9a527b
 	.irq_ack		= irq_chip_ack_parent,
Joerg Roedel 9a527b
 	.irq_set_affinity	= msi_domain_set_affinity,
Joerg Roedel 9a527b
 	.irq_retrigger		= irq_chip_retrigger_hierarchy,
Joerg Roedel 9a527b
-	.irq_compose_msi_msg	= irq_msi_compose_msg,
Joerg Roedel 9a527b
+	.irq_compose_msi_msg	= dmar_msi_compose_msg,
Joerg Roedel 9a527b
 	.irq_write_msi_msg	= dmar_msi_write_msg,
Joerg Roedel 9a527b
 	.flags			= IRQCHIP_SKIP_SET_WAKE,
Joerg Roedel 9a527b
 };