Blob Blame History Raw
From: "Andrea Parri (Microsoft)" <parri.andrea@gmail.com>
Date: Tue, 19 Apr 2022 14:23:25 +0200
Patch-mainline: v5.19-rc1
Subject: PCI: hv: Fix synchronization between channel callback and hv_compose_msi_msg()
Git-commit: a765ed47e45166451680ee9af2b9e435c82ec3ba
References: bsc#1204017, bsc#1203860, bsc#1205617

Dexuan wrote:

  "[...]  when we disable AccelNet, the host PCI VSP driver sends a
   PCI_EJECT message first, and the channel callback may set
   hpdev->state to hv_pcichild_ejecting on a different CPU.  This can
   cause hv_compose_msi_msg() to exit from the loop and 'return', and
   the on-stack variable 'ctxt' is invalid.  Now, if the response
   message from the host arrives, the channel callback will try to
   access the invalid 'ctxt' variable, and this may cause a crash."

Schematically:

  Hyper-V sends PCI_EJECT msg
    hv_pci_onchannelcallback()
      state = hv_pcichild_ejecting
                                       hv_compose_msi_msg()
                                         alloc and init comp_pkt
                                         state == hv_pcichild_ejecting
  Hyper-V sends VM_PKT_COMP msg
    hv_pci_onchannelcallback()
      retrieve address of comp_pkt
                                         'free' comp_pkt and return
      comp_pkt->completion_func()

Dexuan also showed how the crash can be triggered after introducing
suitable delays in the driver code, thus validating the 'assumption'
that the host can still normally respond to the guest's compose_msi
request after the host has started to eject the PCI device.

Fix the synchronization by leveraging the requestor lock as follows:

  - Before 'return'-ing in hv_compose_msi_msg(), remove the ID (while
    holding the requestor lock) associated to the completion packet.

  - Retrieve the address *and call ->completion_func() within a same
    (requestor) critical section in hv_pci_onchannelcallback().

Reported-by: Wei Hu <weh@microsoft.com>
Reported-by: Dexuan Cui <decui@microsoft.com>
Suggested-by: Michael Kelley <mikelley@microsoft.com>
Signed-off-by: Andrea Parri (Microsoft) <parri.andrea@gmail.com>
Reviewed-by: Michael Kelley <mikelley@microsoft.com>
Link: https://lore.kernel.org/r/20220419122325.10078-7-parri.andrea@gmail.com
Signed-off-by: Wei Liu <wei.liu@kernel.org>
Acked-by: Olaf Hering <ohering@suse.de>
---
 drivers/pci/host/pci-hyperv.c | 33 +++++++--
 1 file changed, 27 insertions(+), 6 deletions(-)

--- a/drivers/pci/host/pci-hyperv.c
+++ b/drivers/pci/host/pci-hyperv.c
@@ -1181,7 +1181,7 @@ static void hv_compose_msi_msg(struct ir
 			struct pci_create_interrupt3 v3;
 		} int_pkts;
 	} __packed ctxt;
-
+	u64 trans_id;
 	u32 size;
 	int ret;
 
@@ -1242,10 +1242,10 @@ static void hv_compose_msi_msg(struct ir
 		goto free_int_desc;
 	}
 
-	ret = vmbus_sendpacket(hpdev->hbus->hdev->channel, &ctxt.int_pkts,
-			       size, (unsigned long)&ctxt.pci_pkt,
-			       VM_PKT_DATA_INBAND,
-			       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+	ret = vmbus_sendpacket_getid(hpdev->hbus->hdev->channel, &ctxt.int_pkts,
+				     size, (unsigned long)&ctxt.pci_pkt,
+				     &trans_id, VM_PKT_DATA_INBAND,
+				     VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 	if (ret) {
 		dev_err(&hbus->hdev->device,
 			"Sending request for interrupt failed: 0x%x",
@@ -1262,7 +1262,7 @@ static void hv_compose_msi_msg(struct ir
 		if (hv_pcifront_get_vendor_id(hpdev) == 0xFFFF) {
 			dev_err_once(&hbus->hdev->device,
 				     "the device has gone\n");
-			goto free_int_desc;
+			goto suse_free_trans_id;
 		}
 
 		/*
@@ -1284,7 +1284,7 @@ static void hv_compose_msi_msg(struct ir
 		if (hpdev->state == hv_pcichild_ejecting) {
 			dev_err_once(&hbus->hdev->device,
 				     "the device is being ejected\n");
-			goto free_int_desc;
+			goto suse_free_trans_id;
 		}
 
 		udelay(100);
@@ -1294,7 +1294,7 @@ static void hv_compose_msi_msg(struct ir
 		dev_err(&hbus->hdev->device,
 			"Request for interrupt failed: 0x%x",
 			comp.comp_pkt.completion_status);
-		goto free_int_desc;
+		goto suse_free_trans_id;
 	}
 
 	/*
@@ -1313,6 +1313,16 @@ static void hv_compose_msi_msg(struct ir
 	put_pcichild(hpdev);
 	return;
 
+suse_free_trans_id:
+	/*
+	 * The completion packet on the stack becomes invalid after 'return';
+	 * remove the ID from the VMbus requestor if the identifier is still
+	 * mapped to/associated with the packet.  (The identifier could have
+	 * been 're-used', i.e., already removed and (re-)mapped.)
+	 *
+	 * Cf. hv_pci_onchannelcallback().
+	 */
+	vmbus_request_addr_match(hbus->hdev->channel, trans_id, (unsigned long)&ctxt.pci_pkt);
 free_int_desc:
 	kfree(int_desc);
 drop_reference:
@@ -2160,6 +2170,7 @@ static void hv_pci_onchannelcallback(voi
 	struct pci_bus_relations2 *bus_rel2;
 	struct pci_dev_incoming *dev_message;
 	struct hv_pci_dev *hpdev;
+	unsigned long flags;
 
 	buffer = kmalloc(bufferlen, GFP_ATOMIC);
 	if (!buffer)
@@ -2194,8 +2205,11 @@ static void hv_pci_onchannelcallback(voi
 		switch (desc->type) {
 		case VM_PKT_COMP:
 
-			req_addr = chan->request_addr_callback(chan, req_id);
+			lock_requestor(chan, flags);
+			req_addr = __vmbus_request_addr_match(chan, req_id,
+							      VMBUS_RQST_ADDR_ANY);
 			if (req_addr == VMBUS_RQST_ERROR) {
+				unlock_requestor(chan, flags);
 				dev_err(&hbus->hdev->device,
 					"Invalid transaction ID %llx\n",
 					req_id);
@@ -2203,9 +2217,17 @@ static void hv_pci_onchannelcallback(voi
 			}
 			comp_packet = (struct pci_packet *)req_addr;
 			response = (struct pci_response *)buffer;
+			/*
+			 * Call ->completion_func() within the critical section to make
+			 * sure that the packet pointer is still valid during the call:
+			 * here 'valid' means that there's a task still waiting for the
+			 * completion, and that the packet data is still on the waiting
+			 * task's stack.  Cf. hv_compose_msi_msg().
+			 */
 			comp_packet->completion_func(comp_packet->compl_ctxt,
 						     response,
 						     bytes_recvd);
+			unlock_requestor(chan, flags);
 			break;
 
 		case VM_PKT_DATA_INBAND: