Blob Blame History Raw
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Wed, 7 Oct 2020 13:20:43 +0100
Subject: [PATCH 2/5] x86/msi: Only use high bits of MSI address for DMAR unit
Patch-mainline: Never, upstream uses different implementation
References: bsc#1181001, jsc#ECO-3191

The Intel IOMMU has an MSI-like configuration for its interrupt, but
it isn't really MSI. So it gets to abuse the high 32 bits of the address,
and puts the high 24 bits of the extended APIC ID there.

This isn't something that can be used in the general case for real MSIs,
since external devices using the high bits of the address would be
performing writes to actual memory space above 4GiB, not targeted at the
APIC.

Factor the hack out and allow it only to be used when appropriate,
adding a WARN_ON_ONCE() if other MSIs are targeted at an unreachable
APIC ID. In *theory* that should never happen since the compatibility
MSI messages are not supposed to be used with Interrupt Remapping
enabled. In practice, if IR is enabled but some devices aren't within
scope of any given remapping unit, it might happen. But that's a longer
story and this warning is the right thing to do in that case for the
short term.

The x2apic_enabled() check isn't needed because Linux won't bring up
CPUs with higher APIC IDs unless x2apic is enabled anyway.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Acked-by: Joerg Roedel <jroedel@suse.de>
---
 arch/x86/kernel/apic/msi.c |   36 +++++++++++++++++++++++++++++++-----
 1 file changed, 31 insertions(+), 5 deletions(-)

--- a/arch/x86/kernel/apic/msi.c
+++ b/arch/x86/kernel/apic/msi.c
@@ -25,15 +25,13 @@
 
 static struct irq_domain *msi_default_domain;
 
-static void irq_msi_compose_msg(struct irq_data *data, struct msi_msg *msg)
+static void __irq_msi_compose_msg(struct irq_data *data, struct msi_msg *msg,
+				  bool dmar)
 {
 	struct irq_cfg *cfg = irqd_cfg(data);
 
 	msg->address_hi = MSI_ADDR_BASE_HI;
 
-	if (x2apic_enabled())
-		msg->address_hi |= MSI_ADDR_EXT_DEST_ID(cfg->dest_apicid);
-
 	msg->address_lo =
 		MSI_ADDR_BASE_LO |
 		((apic->irq_dest_mode == 0) ?
@@ -51,6 +49,34 @@ static void irq_msi_compose_msg(struct i
 			MSI_DATA_DELIVERY_FIXED :
 			MSI_DATA_DELIVERY_LOWPRI) |
 		MSI_DATA_VECTOR(cfg->vector);
+
+	/*
+	 * Only the IOMMU itself can use the trick of putting destination
+	 * APIC ID into the high bits of the address. Anything else would
+	 * just be writing to memory if it tried that, and needs IR to
+	 * address higher APIC IDs.
+	 */
+	if (dmar)
+		msg->address_hi |= MSI_ADDR_EXT_DEST_ID(cfg->dest_apicid);
+	else
+		WARN_ON_ONCE(MSI_ADDR_EXT_DEST_ID(cfg->dest_apicid));
+}
+
+static void irq_msi_compose_msg(struct irq_data *data, struct msi_msg *msg)
+{
+	__irq_msi_compose_msg(data, msg, false);
+}
+
+
+/*
+ * The Intel IOMMU (ab)uses the high bits of the MSI address to contain the
+ * high bits of the destination APIC ID. This can't be done in the general
+ * case for MSIs as it would be targeting real memory above 4GiB not the
+ * APIC.
+ */
+static void dmar_msi_compose_msg(struct irq_data *data, struct msi_msg *msg)
+{
+	__irq_msi_compose_msg(data, msg, true);
 }
 
 /*
@@ -186,7 +212,7 @@ static struct irq_chip dmar_msi_controll
 	.irq_ack		= irq_chip_ack_parent,
 	.irq_set_affinity	= msi_domain_set_affinity,
 	.irq_retrigger		= irq_chip_retrigger_hierarchy,
-	.irq_compose_msi_msg	= irq_msi_compose_msg,
+	.irq_compose_msi_msg	= dmar_msi_compose_msg,
 	.irq_write_msi_msg	= dmar_msi_write_msg,
 	.flags			= IRQCHIP_SKIP_SET_WAKE,
 };